/*****************************************************************************
 * subsdec.c : text subtitles decoder
 *****************************************************************************
 * Copyright (C) 2000-2001 VideoLAN
 * $Id$
 *
 * Authors: Gildas Bazin <gbazin@videolan.org>
 *          Samuel Hocevar <sam@zoy.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

/*****************************************************************************
 * Preamble
 *****************************************************************************/
#include <vlc/vlc.h>
#include <vlc/vout.h>
#include <vlc/decoder.h>

#include "osd.h"
#include "vlc_filter.h"

#if defined(HAVE_ICONV)
#include <iconv.h>
#endif

#include "charset.h"

/*****************************************************************************
 * decoder_sys_t : decoder descriptor
 *****************************************************************************/
struct decoder_sys_t
{
    int                 i_align;          /* Subtitles alignment on the vout */

#if defined(HAVE_ICONV)
    iconv_t             iconv_handle;            /* handle to iconv instance */
#endif

};

/*****************************************************************************
 * Local prototypes
 *****************************************************************************/
static int  OpenDecoder   ( vlc_object_t * );
static void CloseDecoder  ( vlc_object_t * );

static subpicture_t *DecodeBlock   ( decoder_t *, block_t ** );
static subpicture_t *ParseText     ( decoder_t *, block_t * );
static void         StripTags      ( char * );

#define DEFAULT_NAME "System Default"

/*****************************************************************************
 * Module descriptor.
 *****************************************************************************/
#if defined(HAVE_ICONV)
static char *ppsz_encodings[] = { DEFAULT_NAME, "ASCII", "UTF-8", "",
    "ISO-8859-1", "CP1252", "MacRoman", "MacIceland","ISO-8859-15", "",
    "ISO-8859-2", "CP1250", "MacCentralEurope", "MacCroatian", "MacRomania", "",
    "ISO-8859-5", "CP1251", "MacCyrillic", "MacUkraine", "KOI8-R", "KOI8-U", "KOI8-RU", "",
    "ISO-8859-6", "CP1256", "MacArabic", "",
    "ISO-8859-7", "CP1253", "MacGreek", "",
    "ISO-8859-8", "CP1255", "MacHebrew", "",
    "ISO-8859-9", "CP1254", "MacTurkish", "",
    "ISO-8859-13", "CP1257", "",
    "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS", "",
    "ISO-2022-CN", "ISO-2022-CN-EXT", "EUC-CN", "EUC-TW", "BIG5", "BIG5-HKSCS", "",
    "ISO-2022-KR", "EUC-KR", "",
    "MacThai", "KOI8-T", "",
    "ISO-8859-3", "ISO-8859-4", "ISO-8859-10", "ISO-8859-14", "ISO-8859-16", "",
    "CP850", "CP862", "CP866", "CP874", "CP932", "CP949", "CP950", "CP1133", "CP1258", "",
    "Macintosh", "",
    "UTF-7", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
    "C99", "JAVA", "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4", "UCS-4BE", "UCS-4LE", "",
    "HZ", "GBK", "GB18030", "JOHAB", "ARMSCII-8",
    "Georgian-Academy", "Georgian-PS", "TIS-620", "MuleLao-1", "VISCII", "TCVN",
    "HPROMAN8", "NEXTSTEP" };
#endif

static int  pi_justification[] = { 0, 1, 2 };
static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};

#define ENCODING_TEXT N_("Subtitles text encoding")
#define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
#define ALIGN_TEXT N_("Subtitles justification")
#define ALIGN_LONGTEXT N_("Set the justification of subtitles")

vlc_module_begin();
    set_description( _("text subtitles decoder") );
    set_capability( "decoder", 50 );
    set_callbacks( OpenDecoder, CloseDecoder );

    add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT,
                 VLC_TRUE );
        change_integer_list( pi_justification, ppsz_justification_text, 0 );
#if defined(HAVE_ICONV)
    add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
                ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
        change_string_list( ppsz_encodings, 0, 0 );
#endif
vlc_module_end();

/*****************************************************************************
 * OpenDecoder: probe the decoder and return score
 *****************************************************************************
 * Tries to launch a decoder and return score so that the interface is able
 * to chose.
 *****************************************************************************/
static int OpenDecoder( vlc_object_t *p_this )
{
    decoder_t     *p_dec = (decoder_t*)p_this;
    decoder_sys_t *p_sys;
    vlc_value_t val;

    if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
        p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
    {
        return VLC_EGENERIC;
    }

    p_dec->pf_decode_sub = DecodeBlock;

    /* Allocate the memory needed to store the decoder's structure */
    if( ( p_dec->p_sys = p_sys =
          (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
    {
        msg_Err( p_dec, "out of memory" );
        return VLC_EGENERIC;
    }

    var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
    var_Get( p_dec, "subsdec-align", &val );
    p_sys->i_align = val.i_int;

#if defined(HAVE_ICONV)
    if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
    {
        msg_Dbg( p_dec, "using character encoding: %s",
                 p_dec->fmt_in.subs.psz_encoding );
        p_sys->iconv_handle =
            iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
    }
    else
    {
        var_Create( p_dec, "subsdec-encoding",
                    VLC_VAR_STRING | VLC_VAR_DOINHERIT );
        var_Get( p_dec, "subsdec-encoding", &val );
        if( !strcmp( val.psz_string, DEFAULT_NAME ) )
        {
            char *psz_charset =(char*)malloc( 100 );
            vlc_current_charset( &psz_charset );
            p_sys->iconv_handle = iconv_open( "UTF-8", psz_charset );
            msg_Dbg( p_dec, "using character encoding: %s", psz_charset );
            free( psz_charset );
        }
        else if( val.psz_string )
        {
            msg_Dbg( p_dec, "using character encoding: %s", val.psz_string );
            p_sys->iconv_handle = iconv_open( "UTF-8", val.psz_string );
        }

        if( p_sys->iconv_handle == (iconv_t)-1 )
        {
            msg_Warn( p_dec, "unable to do requested conversion" );
        }

        if( val.psz_string ) free( val.psz_string );
    }
#else

    msg_Dbg( p_dec, "no iconv support available" );
#endif

    return VLC_SUCCESS;
}

/****************************************************************************
 * DecodeBlock: the whole thing
 ****************************************************************************
 * This function must be fed with complete subtitles units.
 ****************************************************************************/
static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
{
    subpicture_t *p_spu;

    if( !pp_block || *pp_block == NULL ) return NULL;

    p_spu = ParseText( p_dec, *pp_block );

    block_Release( *pp_block );
    *pp_block = NULL;

    return p_spu;
}

/*****************************************************************************
 * CloseDecoder: clean up the decoder
 *****************************************************************************/
static void CloseDecoder( vlc_object_t *p_this )
{
    decoder_t *p_dec = (decoder_t *)p_this;
    decoder_sys_t *p_sys = p_dec->p_sys;

#if defined(HAVE_ICONV)
    if( p_sys->iconv_handle != (iconv_t)-1 )
    {
        iconv_close( p_sys->iconv_handle );
    }
#endif

    free( p_sys );
}

/*****************************************************************************
 * ParseText: parse an text subtitle packet and send it to the video output
 *****************************************************************************/
static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
{
    decoder_sys_t *p_sys = p_dec->p_sys;
    subpicture_t *p_spu = 0;
    char *psz_subtitle;
    int i_align_h, i_align_v;
    video_format_t fmt;

    /* We cannot display a subpicture with no date */
    if( p_block->i_pts == 0 )
    {
        msg_Warn( p_dec, "subtitle without a date" );
        return NULL;
    }

    /* Check validity of packet data */
    if( p_block->i_buffer <= 1 || p_block->p_buffer[0] == '\0' )
    {
        msg_Warn( p_dec, "empty subtitle" );
        return NULL;
    }

    /* Should be resiliant against bad subtitles */
    psz_subtitle = strndup( p_block->p_buffer, p_block->i_buffer );

    i_align_h = p_sys->i_align ? 20 : 0;
    i_align_v = 10;

#if defined(HAVE_ICONV)
    if( p_sys->iconv_handle != (iconv_t)-1 )
    {
        char *psz_new_subtitle;
        char *psz_convert_buffer_out;
        char *psz_convert_buffer_in;
        size_t ret, inbytes_left, outbytes_left;

        psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
        psz_convert_buffer_out = psz_new_subtitle;
        psz_convert_buffer_in = psz_subtitle;
        inbytes_left = strlen( psz_subtitle );
        outbytes_left = 6 * inbytes_left;
        ret = iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
                     &inbytes_left, &psz_convert_buffer_out, &outbytes_left );
        *psz_convert_buffer_out = '\0';

        if( inbytes_left )
        {
            msg_Warn( p_dec, "Failed to convert subtitle encoding, "
                      "dropping subtitle.\nTry setting a different "
                      "character-encoding for the subtitle." );
            free( psz_subtitle );
            return NULL;
        }
        else
        {
            free( psz_subtitle );
            psz_subtitle = psz_new_subtitle;
        }
    }
#endif

    if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') )
    {
        /* Decode SSA strings */
        /* We expect: ReadOrder, Layer, Style, Name, MarginL, MarginR,
         * MarginV, Effect, Text */
        char *psz_new_subtitle;
        char *psz_buffer_sub;
        int         i_comma;
        int         i_text;

        psz_buffer_sub = psz_subtitle;
        for( ;; )
        {
            i_comma = 0;
            while( i_comma < 8 &&
                *psz_buffer_sub != '\0' )
            {
                if( *psz_buffer_sub == ',' )
                {
                    i_comma++;
                }
                psz_buffer_sub++;
            }
            psz_new_subtitle = malloc( strlen( psz_buffer_sub ) + 1);
            i_text = 0;
            while( psz_buffer_sub[0] != '\0' )
            {
                if( psz_buffer_sub[0] == '\\' && ( psz_buffer_sub[1] == 'n' ||
                    psz_buffer_sub[1] == 'N' ) )
                {
                    psz_new_subtitle[i_text] = '\n';
                    i_text++;
                    psz_buffer_sub += 2;
                }
                else if( psz_buffer_sub[0] == '{' &&
                         psz_buffer_sub[1] == '\\' )
                {
                    /* SSA control code */
                    while( psz_buffer_sub[0] != '\0' &&
                           psz_buffer_sub[0] != '}' )
                    {
                        psz_buffer_sub++;
                    }
                    psz_buffer_sub++;
                }
                else
                {
                    psz_new_subtitle[i_text] = psz_buffer_sub[0];
                    i_text++;
                    psz_buffer_sub++;
                }
            }
            psz_new_subtitle[i_text] = '\0';
            free( psz_subtitle );
            psz_subtitle = psz_new_subtitle;
            break;
        }
    }

    StripTags( psz_subtitle );

    p_spu = p_dec->pf_spu_buffer_new( p_dec );
    if( !p_spu )
    {
        msg_Warn( p_dec, "can't get spu buffer" );
        free( psz_subtitle );
        return 0;
    }

    /* Create a new subpicture region */
    memset( &fmt, 0, sizeof(video_format_t) );
    fmt.i_chroma = VLC_FOURCC('T','E','X','T');
    fmt.i_aspect = 0;
    fmt.i_width = fmt.i_height = 0;
    fmt.i_x_offset = fmt.i_y_offset = 0;
    p_spu->p_region = p_spu->pf_create_region( VLC_OBJECT(p_dec), &fmt );
    if( !p_spu->p_region )
    {
        msg_Err( p_dec, "cannot allocate SPU region" );
        free( psz_subtitle );
        p_dec->pf_spu_buffer_del( p_dec, p_spu );
        return 0;
    }

    p_spu->p_region->psz_text = psz_subtitle;
    p_spu->i_start = p_block->i_pts;
    p_spu->i_stop = p_block->i_pts + p_block->i_length;
    p_spu->b_ephemer = (p_block->i_length == 0);
    p_spu->b_absolute = VLC_FALSE;

    p_spu->i_flags = OSD_ALIGN_BOTTOM | p_sys->i_align;
    p_spu->i_x = i_align_h;
    p_spu->i_y = i_align_v;

    return p_spu;
}

static void StripTags( char *psz_text )
{
    int i_left_moves = 0;
    vlc_bool_t b_inside_tag = VLC_FALSE;
    int i = 0;
    int i_tag_start = -1;
    while( psz_text[ i ] )
    {
        if( !b_inside_tag )
        {
            if( psz_text[ i ] == '<' )
            {
                b_inside_tag = VLC_TRUE;
                i_tag_start = i;
            }
            psz_text[ i - i_left_moves ] = psz_text[ i ];
        }
        else
        {
            if( ( psz_text[ i ] == ' ' ) ||
                ( psz_text[ i ] == '\t' ) ||
                ( psz_text[ i ] == '\n' ) ||
                ( psz_text[ i ] == '\r' ) )
            {
                b_inside_tag = VLC_FALSE;
                i_tag_start = -1;
            }
            else if( psz_text[ i ] == '>' )
            {
                i_left_moves += i - i_tag_start + 1;
                i_tag_start = -1;
                b_inside_tag = VLC_FALSE;
            }
            else
            {
                psz_text[ i - i_left_moves ] = psz_text[ i ];
            }
        }
        i++;
    }
    psz_text[ i - i_left_moves ] = '\0';
}