/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 * kanji_conv.c
 *
 * Copyright (C) 2000 Takuo Kitame <kitame@gnome.gr.jp>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 */

#include "common.h"
#include "gnomeicu.h"
#include <iconv.h>
#include "icuchat.h"
#include "kanji_conv.h"

#define ESC          0x1b
#define SS2          0x8e

/****************************************************************************/
/* Japanese string code detector */
/****************************************************************************/
static int 
detect_kanji(const char *str)
{
    int expected = KC_ASCII;
    register int c;
    int c1, c2;
    int euc_c = 0, sjis_c = 0;
    unsigned char *ptr = (char *)str;
    
    while((c = (int)*ptr)!= '\0') {
        if(c == ESC) {
            if((c = (int)*(++ptr)) == '\0')
                break;
            if(c == '$') {
                if((c = (int)*(++ptr)) == '\0')
                    break;
                if(c == 'B' || c == '@')
                    return KC_JIS;
            }
            ptr++;
            continue;
        }
        if((c >= 0x81 && c <= 0x8d) || (c >= 0x8f && c <= 0x9f))
            return KC_SJIS;
        
        if(c == SS2) {
            if((c = (int)*(++ptr)) == '\0')
                break;
            if((c >= 0x40 && c <= 0x7e) ||
               (c >= 0x80 && c <= 0xa0) || 
               (c >= 0xe0 && c <= 0xfc))
                return KC_SJIS;
            if(c >= 0xa1 && c <= 0xdf)
                break;
            
            ptr++;
            continue;
        }        
        if(c >= 0xa1 && c <= 0xdf) {
            if((c = (int)*(++ptr)) == '\0')
                break;
            
            if (c >= 0xe0 && c <= 0xfe)
                return KC_EUC;
            if (c >= 0xa1 && c <= 0xdf) {
                expected = EUCORSJIS;
                ptr++;
                continue;
            }
#if 1
            if(c == 0xa0 || (0xe0 <= c && c <= 0xfe))
                return KC_EUC;
            else {
                expected = EUCORSJIS;
                ptr++;
                continue;
            }
#else
            if(c <= 0x9f)
                return KC_SJIS;
            if(c >= 0xf0 && c <= 0xfe)
                return KC_EUC;
#endif
            
            if(c >= 0xe0 && c <= 0xef) {
                expected = EUCORSJIS;
                while(c >= 0x40) {
                    if(c >= 0x81) {
                        if(c <= 0x8d || (c >= 0x8f && c <= 0x9f))
                            return KC_SJIS;
                        else if(c >= 0xfd && c <= 0xfe) {
                            return KC_EUC;
                        }
                    }
                    if((c = (int)*(++ptr)) == '\0')
                        break;
                }
                ptr++;
                continue;
            }
            
            if(c >= 0xe0 && c <= 0xef) {
                if((c = (int)*(++ptr)) == '\0')
                    break;
                if((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xa0))
                    return KC_SJIS;
                if(c >= 0xfd && c <= 0xfe)
                    return KC_EUC;
                if(c >= 0xa1 && c <= 0xfc)
                    expected = EUCORSJIS;
            }
        }
#if 1
        if (0xf0 <= c && c <= 0xfe)
            return KC_EUC;
#endif
        ptr++;
    }

    ptr = (char *)str;
    c2 = 0;
    while((c1 = (int)*ptr++) != '\0') {
        if(((c2 >  0x80 && c2 < 0xa0) || (c2 >= 0xe0 && c2 < 0xfd)) &&
           ((c1 >= 0x40 && c1 < 0x7f) || (c1 >= 0x80 && c1 < 0xfd)))
            sjis_c++, c1 = *ptr++;
        c2 = c1;
    }

    ptr = (char *)str;
    c2 = 0;
    while((c1 = (int)*ptr++) != '\0') {
        if((c2 > 0xa0  && c2 < 0xff) &&
           (c1 > 0xa0  && c1 < 0xff))
            euc_c++, c1 = *ptr++;
        c2 = c1;
    }

    if(sjis_c > euc_c)
        expected = KC_SJIS;
    else if (euc_c > 0)
        expected = KC_EUC;
    else 
        expected = KC_ASCII;
    return expected;
}

/* Convert to destset with auto detect srcset, return strduped */
char *
kanji_conv_auto(const char *str, const char *dstset)
#ifdef HAVE_ICONV
{
    unsigned char *buf, *ret;
    iconv_t cd;
    size_t insize = 0;
    size_t outsize = 0;
    size_t nconv = 0;
    char *inptr;
    char *outptr;
    const char *srcset = "";

    if(!str)
        return NULL;

    if(!toggles->kanji) return strdup(str);
    
    switch (detect_kanji(str)) {
    case KC_EUC:
        srcset = KANJI_EUC;
        break;
    case KC_JIS:
        srcset = KANJI_JIS;
        break;
    case KC_SJIS:
        srcset = KANJI_SJIS;
        break;
    case KC_ASCII:
    default:
        return strdup(str);
        break;
    }
    
#ifdef TRACE_FUNCTION
    g_print("kanji_conv_auto (%s), %s to ", str, srcset);
#endif

    buf = (unsigned char *)malloc(strlen(str)* 4 + 1);
    if(!buf)
        return NULL;
    
    insize = strlen(str);
    inptr = (char*)str;
    outsize = strlen(str) * 4 ;
    outptr = buf;
    
    cd = iconv_open(dstset, srcset);
    if(cd == (iconv_t) -1) {
        if(errno == EINVAL)
            return strdup(str);
    }
    
    nconv = iconv(cd, &inptr, &insize, &outptr, &outsize);
    if(nconv == (size_t) -1) {
        if (errno == EINVAL)
            memmove (buf, inptr, insize);
    } else
        iconv(cd, NULL, NULL, &outptr, &outsize);
    
    *outptr = '\0';
    iconv_close(cd);
    
    ret = strdup(buf);
    free(buf);

#ifdef TRACE_FUNCTION
    g_print("%s (%s)\n", dstset, ret);
#endif
    
    return ret;
}
#else
{
    return strdup(str);
}
#endif
/* convert to system locale code, auto detect srcset, return strduped */
char *
kanji_conv_to_locale(const char *str)
#ifdef HAVE_ICONV
{
   static char *jpcode = NULL;

   static char *locale_euc[]  = { "ja", "ja_JP", "ja_JP.ujis", "ja_JP.EUC", "ja_JP.eucJP", "ja_JP.eucjp", NULL };
   static char *locale_jis[]  = { "ja_JP.JIS", "ja_JP.jis", "ja_JP.iso-2022-jp", NULL };
   static char *locale_sjis[] = { "ja_JP.SJIS", "ja_JP.sjis", NULL };

   static struct LOCALE_TABLE {
       char *code;
       char **name_list;
   } locale_table[] = { 
       {KANJI_EUC, locale_euc},
       {KANJI_JIS, locale_jis},
       {KANJI_SJIS, locale_sjis}
   };

   if(!str)
       return NULL;

   if(!toggles->kanji) return strdup(str);
   
   if(jpcode == NULL) {
       char *ctype = setlocale(LC_CTYPE, "");
       int i, j;
       for( j=0; jpcode == NULL && 
                j < sizeof(locale_table)/sizeof(struct LOCALE_TABLE); j++ ) {
           char **name = locale_table[j].name_list;
           for( i=0; name[i]; i++ )
               if (strcasecmp(ctype, name[i]) == 0) {
                   jpcode = locale_table[j].code;
                   break;
               }
       }
       if(jpcode == NULL)
           jpcode = KANJI_EUC;
   }
   
   return kanji_conv_auto(str, jpcode);
}
#else
{
    return strdup(str);
}
#endif
/* convert srcset to destset, return strduped */
char *
kanji_conv(const char *str, const char *dstset, const char *srcset)
#ifdef HAVE_ICONV
{
    unsigned char *buf, *ret;
    iconv_t cd;
    size_t insize = 0;
    size_t outsize = 0;
    size_t nconv = 0;
    char *inptr;
    char *outptr;
    
    if(!str)
        return NULL;

    if(!toggles->kanji) return strdup(str);
    
#ifdef TRACE_FUNCTION
    g_print("kanji_conv (%s), %s to ", str, srcset);
#endif

    buf = (unsigned char *)malloc(strlen(str) * 4 + 1);
    if(!buf)
        return NULL;
    
    insize = strlen(str);
    inptr = (char*)str;
    outsize = strlen(str) * 4 ;
    outptr = buf;
    
    cd = iconv_open (dstset, srcset);
    if(cd == (iconv_t) -1) {
        if(errno == EINVAL)
            return strdup(str);
    }
    
    nconv = iconv (cd, &inptr, &insize, &outptr, &outsize);
    if (nconv == (size_t) -1) {
        if(errno == EINVAL)
            memmove (buf, inptr, insize);
    } else
       iconv (cd, NULL, NULL, &outptr, &outsize);
    
    *outptr = '\0';
    iconv_close(cd);
    
    ret = strdup(buf);
    free(buf);

#ifdef TRACE_FUNCTION
    g_print(" %s (%s).\n", ret, dstset);
#endif
    
    return ret;
}
#else
{
    return strdup(str);
}
#endif

/* easy wrapper */
/* no return, modify pointer */
void
kanji_conv_auto_s(char **str, const char *destset)
{
    char *ret;

#ifndef HAVE_ICONV /* not defined */
    return;
#endif

    if(!toggles->kanji) return;
    
    /* ret will strduped value */
    ret = kanji_conv_auto(*str, destset);
    g_free(*str);
    *str = ret;
}

/* no return, modify pointer */
void
kanji_conv_to_locale_s(char **str)
{
    char *ret;
    
#ifndef HAVE_ICONV /* not defined */
    return;
#endif

    if(!toggles->kanji) return;
    
    /* ret will strduped value */
    ret = kanji_conv_to_locale(*str);
    g_free(*str);
    *str = ret;
}
