Commit 08f85dbc authored by Hugo Beauzée-Luyssen's avatar Hugo Beauzée-Luyssen Committed by Jean-Baptiste Kempf

subsdec: Use text_segment instead of HTML when parsing subtitles

parent f2e43bd7
...@@ -33,11 +33,13 @@ ...@@ -33,11 +33,13 @@
#include <limits.h> #include <limits.h>
#include <errno.h> #include <errno.h>
#include <ctype.h>
#include <vlc_common.h> #include <vlc_common.h>
#include <vlc_plugin.h> #include <vlc_plugin.h>
#include <vlc_codec.h> #include <vlc_codec.h>
#include <vlc_charset.h> #include <vlc_charset.h>
#include <vlc_xml.h>
#include "substext.h" #include "substext.h"
...@@ -160,6 +162,181 @@ static const char *const ppsz_encoding_names[] = { ...@@ -160,6 +162,181 @@ static const char *const ppsz_encoding_names[] = {
N_("Vietnamese (Windows-1258)"), N_("Vietnamese (Windows-1258)"),
}; };
static const struct {
const char *psz_name;
uint32_t i_value;
} p_html_colors[] = {
/* Official html colors */
{ "Aqua", 0x00FFFF },
{ "Black", 0x000000 },
{ "Blue", 0x0000FF },
{ "Fuchsia", 0xFF00FF },
{ "Gray", 0x808080 },
{ "Green", 0x008000 },
{ "Lime", 0x00FF00 },
{ "Maroon", 0x800000 },
{ "Navy", 0x000080 },
{ "Olive", 0x808000 },
{ "Purple", 0x800080 },
{ "Red", 0xFF0000 },
{ "Silver", 0xC0C0C0 },
{ "Teal", 0x008080 },
{ "White", 0xFFFFFF },
{ "Yellow", 0xFFFF00 },
/* Common ones */
{ "AliceBlue", 0xF0F8FF },
{ "AntiqueWhite", 0xFAEBD7 },
{ "Aqua", 0x00FFFF },
{ "Aquamarine", 0x7FFFD4 },
{ "Azure", 0xF0FFFF },
{ "Beige", 0xF5F5DC },
{ "Bisque", 0xFFE4C4 },
{ "Black", 0x000000 },
{ "BlanchedAlmond", 0xFFEBCD },
{ "Blue", 0x0000FF },
{ "BlueViolet", 0x8A2BE2 },
{ "Brown", 0xA52A2A },
{ "BurlyWood", 0xDEB887 },
{ "CadetBlue", 0x5F9EA0 },
{ "Chartreuse", 0x7FFF00 },
{ "Chocolate", 0xD2691E },
{ "Coral", 0xFF7F50 },
{ "CornflowerBlue", 0x6495ED },
{ "Cornsilk", 0xFFF8DC },
{ "Crimson", 0xDC143C },
{ "Cyan", 0x00FFFF },
{ "DarkBlue", 0x00008B },
{ "DarkCyan", 0x008B8B },
{ "DarkGoldenRod", 0xB8860B },
{ "DarkGray", 0xA9A9A9 },
{ "DarkGrey", 0xA9A9A9 },
{ "DarkGreen", 0x006400 },
{ "DarkKhaki", 0xBDB76B },
{ "DarkMagenta", 0x8B008B },
{ "DarkOliveGreen", 0x556B2F },
{ "Darkorange", 0xFF8C00 },
{ "DarkOrchid", 0x9932CC },
{ "DarkRed", 0x8B0000 },
{ "DarkSalmon", 0xE9967A },
{ "DarkSeaGreen", 0x8FBC8F },
{ "DarkSlateBlue", 0x483D8B },
{ "DarkSlateGray", 0x2F4F4F },
{ "DarkSlateGrey", 0x2F4F4F },
{ "DarkTurquoise", 0x00CED1 },
{ "DarkViolet", 0x9400D3 },
{ "DeepPink", 0xFF1493 },
{ "DeepSkyBlue", 0x00BFFF },
{ "DimGray", 0x696969 },
{ "DimGrey", 0x696969 },
{ "DodgerBlue", 0x1E90FF },
{ "FireBrick", 0xB22222 },
{ "FloralWhite", 0xFFFAF0 },
{ "ForestGreen", 0x228B22 },
{ "Fuchsia", 0xFF00FF },
{ "Gainsboro", 0xDCDCDC },
{ "GhostWhite", 0xF8F8FF },
{ "Gold", 0xFFD700 },
{ "GoldenRod", 0xDAA520 },
{ "Gray", 0x808080 },
{ "Grey", 0x808080 },
{ "Green", 0x008000 },
{ "GreenYellow", 0xADFF2F },
{ "HoneyDew", 0xF0FFF0 },
{ "HotPink", 0xFF69B4 },
{ "IndianRed", 0xCD5C5C },
{ "Indigo", 0x4B0082 },
{ "Ivory", 0xFFFFF0 },
{ "Khaki", 0xF0E68C },
{ "Lavender", 0xE6E6FA },
{ "LavenderBlush", 0xFFF0F5 },
{ "LawnGreen", 0x7CFC00 },
{ "LemonChiffon", 0xFFFACD },
{ "LightBlue", 0xADD8E6 },
{ "LightCoral", 0xF08080 },
{ "LightCyan", 0xE0FFFF },
{ "LightGoldenRodYellow", 0xFAFAD2 },
{ "LightGray", 0xD3D3D3 },
{ "LightGrey", 0xD3D3D3 },
{ "LightGreen", 0x90EE90 },
{ "LightPink", 0xFFB6C1 },
{ "LightSalmon", 0xFFA07A },
{ "LightSeaGreen", 0x20B2AA },
{ "LightSkyBlue", 0x87CEFA },
{ "LightSlateGray", 0x778899 },
{ "LightSlateGrey", 0x778899 },
{ "LightSteelBlue", 0xB0C4DE },
{ "LightYellow", 0xFFFFE0 },
{ "Lime", 0x00FF00 },
{ "LimeGreen", 0x32CD32 },
{ "Linen", 0xFAF0E6 },
{ "Magenta", 0xFF00FF },
{ "Maroon", 0x800000 },
{ "MediumAquaMarine", 0x66CDAA },
{ "MediumBlue", 0x0000CD },
{ "MediumOrchid", 0xBA55D3 },
{ "MediumPurple", 0x9370D8 },
{ "MediumSeaGreen", 0x3CB371 },
{ "MediumSlateBlue", 0x7B68EE },
{ "MediumSpringGreen", 0x00FA9A },
{ "MediumTurquoise", 0x48D1CC },
{ "MediumVioletRed", 0xC71585 },
{ "MidnightBlue", 0x191970 },
{ "MintCream", 0xF5FFFA },
{ "MistyRose", 0xFFE4E1 },
{ "Moccasin", 0xFFE4B5 },
{ "NavajoWhite", 0xFFDEAD },
{ "Navy", 0x000080 },
{ "OldLace", 0xFDF5E6 },
{ "Olive", 0x808000 },
{ "OliveDrab", 0x6B8E23 },
{ "Orange", 0xFFA500 },
{ "OrangeRed", 0xFF4500 },
{ "Orchid", 0xDA70D6 },
{ "PaleGoldenRod", 0xEEE8AA },
{ "PaleGreen", 0x98FB98 },
{ "PaleTurquoise", 0xAFEEEE },
{ "PaleVioletRed", 0xD87093 },
{ "PapayaWhip", 0xFFEFD5 },
{ "PeachPuff", 0xFFDAB9 },
{ "Peru", 0xCD853F },
{ "Pink", 0xFFC0CB },
{ "Plum", 0xDDA0DD },
{ "PowderBlue", 0xB0E0E6 },
{ "Purple", 0x800080 },
{ "Red", 0xFF0000 },
{ "RosyBrown", 0xBC8F8F },
{ "RoyalBlue", 0x4169E1 },
{ "SaddleBrown", 0x8B4513 },
{ "Salmon", 0xFA8072 },
{ "SandyBrown", 0xF4A460 },
{ "SeaGreen", 0x2E8B57 },
{ "SeaShell", 0xFFF5EE },
{ "Sienna", 0xA0522D },
{ "Silver", 0xC0C0C0 },
{ "SkyBlue", 0x87CEEB },
{ "SlateBlue", 0x6A5ACD },
{ "SlateGray", 0x708090 },
{ "SlateGrey", 0x708090 },
{ "Snow", 0xFFFAFA },
{ "SpringGreen", 0x00FF7F },
{ "SteelBlue", 0x4682B4 },
{ "Tan", 0xD2B48C },
{ "Teal", 0x008080 },
{ "Thistle", 0xD8BFD8 },
{ "Tomato", 0xFF6347 },
{ "Turquoise", 0x40E0D0 },
{ "Violet", 0xEE82EE },
{ "Wheat", 0xF5DEB3 },
{ "White", 0xFFFFFF },
{ "WhiteSmoke", 0xF5F5F5 },
{ "Yellow", 0xFFFF00 },
{ "YellowGreen", 0x9ACD32 },
{ NULL, 0 }
};
static const int pi_justification[] = { 0, 1, 2 }; static const int pi_justification[] = { 0, 1, 2 };
static const char *const ppsz_justification_text[] = { static const char *const ppsz_justification_text[] = {
N_("Center"),N_("Left"),N_("Right")}; N_("Center"),N_("Left"),N_("Right")};
...@@ -209,13 +386,13 @@ struct decoder_sys_t ...@@ -209,13 +386,13 @@ struct decoder_sys_t
vlc_iconv_t iconv_handle; /* handle to iconv instance */ vlc_iconv_t iconv_handle; /* handle to iconv instance */
bool b_autodetect_utf8; bool b_autodetect_utf8;
xml_reader_t* p_reader;
}; };
static subpicture_t *DecodeBlock ( decoder_t *, block_t ** ); static subpicture_t *DecodeBlock ( decoder_t *, block_t ** );
static subpicture_t *ParseText ( decoder_t *, block_t * ); static subpicture_t *ParseText ( decoder_t *, block_t * );
static char *StripTags ( char * ); static text_segment_t *ParseSubtitles(int *pi_align, const char * );
static char *CreateHtmlSubtitle( int *pi_align, char * );
/***************************************************************************** /*****************************************************************************
* OpenDecoder: probe the decoder and return score * OpenDecoder: probe the decoder and return score
...@@ -316,6 +493,13 @@ static int OpenDecoder( vlc_object_t *p_this ) ...@@ -316,6 +493,13 @@ static int OpenDecoder( vlc_object_t *p_this )
p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" ); p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" );
p_sys->p_reader = xml_ReaderCreate( p_dec, NULL );
if ( !p_sys->p_reader )
{
free( p_sys );
return VLC_EGENERIC;
}
return VLC_SUCCESS; return VLC_SUCCESS;
} }
...@@ -461,351 +645,306 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) ...@@ -461,351 +645,306 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
subpicture_updater_sys_t *p_spu_sys = p_spu->updater.p_sys; subpicture_updater_sys_t *p_spu_sys = p_spu->updater.p_sys;
p_spu_sys->align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; p_spu_sys->align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
char* psz_sub = CreateHtmlSubtitle( &p_spu_sys->align, psz_subtitle ); p_spu_sys->p_segments = ParseSubtitles( &p_spu_sys->align, psz_subtitle );
p_spu_sys->p_segments = text_segment_New( psz_sub );
//FIXME: Remove the variable? //FIXME: Remove the variable?
//if( var_InheritBool( p_dec, "subsdec-formatted" ) ) //if( var_InheritBool( p_dec, "subsdec-formatted" ) )
free( psz_sub );
return p_spu; return p_spu;
} }
/* Try to respect any style tags present in the subtitle string. The main static bool AppendCharacter( text_segment_t* p_segment, char c )
* problem here is a lack of adequate specs for the subtitle formats.
* SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhere.
* SAMI has a detailed spec, but extensive rework is needed in the demux
* code to prevent all this style information being excised, as it presently
* does.
* That leaves the others - none of which were (I guess) originally intended
* to be carrying style information. Over time people have used them that way.
* In the absence of specifications from which to work, the tags supported
* have been restricted to the simple set permitted by the USF DTD, ie. :
* Basic: <br>, <i>, <b>, <u>, <s>
* Extended: <font>
* Attributes: face
* family
* size
* color
* outline-color
* shadow-color
* outline-level
* shadow-level
* back-color
* alpha
* There is also the further restriction that the subtitle be well-formed
* as an XML entity, ie. the HTML sentence:
* <b><i>Bold and Italics</b></i>
* doesn't qualify because the tags aren't nested one inside the other.
* <text> tags are automatically added to the output to ensure
* well-formedness.
* If the text doesn't qualify for any reason, a NULL string is
* returned, and the rendering engine will fall back to the
* plain text version of the subtitle.
*/
/* TODO: highly suboptimal, offset should be cached */
static void HtmlNPut( char **ppsz_html, const char *psz_text, int i_max )
{ {
char *psz_html = *ppsz_html; char* tmp;
if( psz_html == NULL ) if ( asprintf( &tmp, "%s%c", p_segment->psz_text, c ) < 0 )
return; return false;
free( p_segment->psz_text );
p_segment->psz_text = tmp;
return true;
}
static char* ConsumeAttribute( const char** ppsz_subtitle, char** psz_attribute_value )
{
const char* psz_subtitle = *ppsz_subtitle;
char* psz_attribute_name;
while (*psz_subtitle == ' ')
psz_subtitle++;
const size_t i_offset = strlen(psz_html); size_t attr_len = 0;
const size_t i_len = strnlen(psz_text, i_max); char delimiter;
psz_html = realloc( psz_html, i_offset + i_len + 1 ); while ( *psz_subtitle && isalpha( *psz_subtitle ) )
if( psz_html != NULL )
{ {
memcpy( psz_html + i_offset, psz_text, i_len ); psz_subtitle++;
psz_html[i_offset + i_len] = '\0'; attr_len++;
} }
if ( !*psz_subtitle )
return NULL;
psz_attribute_name = malloc( attr_len + 1 );
if ( unlikely( !psz_attribute_name ) )
return NULL;
strncpy( psz_attribute_name, psz_subtitle - attr_len, attr_len );
psz_attribute_name[attr_len] = 0;
// Skip over to the attribute value
while ( *psz_subtitle && *psz_subtitle != '=' )
psz_subtitle++;
// Aknoledge the delimiter if any
while ( *psz_subtitle && isspace( *psz_subtitle) )
psz_subtitle++;
if ( *psz_subtitle == '\'' || *psz_subtitle == '"' )
delimiter = *psz_subtitle;
else else
free( *ppsz_html ); delimiter = 0;
*ppsz_html = psz_html;
// Skip spaces, just in case
while ( *psz_subtitle && isspace( *psz_subtitle ) )
psz_subtitle++;
attr_len = 0;
while ( *psz_subtitle && ( ( delimiter != 0 && *psz_subtitle != delimiter ) ||
( delimiter == 0 && !isalpha( *psz_subtitle ) ) ) )
{
psz_subtitle++;
attr_len++;
}
if ( !*psz_subtitle || unlikely( !( *psz_attribute_value = malloc( attr_len + 1 ) ) ) )
{
free( psz_attribute_name );
return NULL;
}
strncpy( *psz_attribute_value, psz_subtitle - attr_len, attr_len );
*psz_attribute_value[attr_len] = 0;
*ppsz_subtitle = psz_subtitle;
return psz_attribute_name;
} }
static void HtmlPut( char **ppsz_html, const char *psz_text ) static int GetColor( const char* psz_color )
{ {
HtmlNPut( ppsz_html, psz_text, INT_MAX ); for( int i = 0; p_html_colors[i].psz_name != NULL; i++ )
{
if( !strcasecmp( psz_color, p_html_colors[i].psz_name ) )
{
return p_html_colors[i].i_value;
}
}
return 0;
} }
static void HtmlCopy( char **ppsz_html, char **ppsz_subtitle, const char *psz_text ) /*
* mini style stack implementation
*/
typedef struct style_stack style_stack_t;
struct style_stack
{ {
HtmlPut( ppsz_html, psz_text ); text_style_t* p_style;
*ppsz_subtitle += strlen(psz_text); style_stack_t* p_next;
} };
static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle ) static text_style_t* DuplicateAndPushStyle(style_stack_t** pp_stack)
{ {
char *psz_tag = malloc( 1 ); text_style_t* p_dup = *pp_stack ? text_style_Duplicate( (*pp_stack)->p_style ) : text_style_New();
if( psz_tag == NULL ) if ( unlikely( !p_dup ) )
return NULL; return NULL;
style_stack_t* p_entry = malloc( sizeof( *p_entry ) );
char *psz_html = malloc( 1 ); if ( unlikely( !p_entry ) )
if( psz_html == NULL )
{ {
free( psz_tag ); free( p_dup );
return NULL; return NULL;
} }
psz_tag[0] = '\0'; // Give the style ownership to the segment.
psz_html[0] = '\0'; p_entry->p_style = p_dup;
p_entry->p_next = *pp_stack;
*pp_stack = p_entry;
return p_dup;
}
bool b_has_align = false; static void PopStyle(style_stack_t** pp_stack)
{
style_stack_t* p_old = *pp_stack;
*pp_stack = p_old->p_next;
// Don't free the style, it is now owned by the text_segment_t
free( p_old );
}
static text_segment_t* NewTextSegmentPushStyle( text_segment_t* p_segment, style_stack_t** pp_stack )
{
text_segment_t* p_new = text_segment_New( NULL );
if ( unlikely( p_new == NULL ) )
return NULL;
text_style_t* p_style = DuplicateAndPushStyle( pp_stack );
p_new->style = p_style;
p_segment->p_next = p_new;
return p_new;
}
static text_segment_t* NewTextSegmentPopStyle( text_segment_t* p_segment, style_stack_t** pp_stack )
{
text_segment_t* p_new = text_segment_New( NULL );
if ( unlikely( p_new == NULL ) )
return NULL;
PopStyle( pp_stack );
// We shouldn't have an empty stack since this happens when closing a tag,
// but better be safe than sorry if (/when) we encounter a broken subtitle file.
text_style_t* p_dup = *pp_stack ? text_style_Duplicate( (*pp_stack)->p_style ) : text_style_New();
p_new->style = p_dup;
p_segment->p_next = p_new;
return p_new;
}
static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
{
text_segment_t* p_segment;
text_segment_t* p_first_segment;
style_stack_t* p_stack = NULL;
//FIXME: Remove initial allocation? Might make the below code more complicated
p_first_segment = p_segment = text_segment_New( "" );
HtmlPut( &psz_html, "<text>" ); bool b_has_align = false;
/* */ /* */
while( *psz_subtitle ) while( *psz_subtitle )
{ {
if( *psz_subtitle == '\n' ) if( *psz_subtitle == '\n' )
{ {
HtmlPut( &psz_html, "<br/>" ); if ( !AppendCharacter( p_segment, '\n' ) )
goto fail;
psz_subtitle++; psz_subtitle++;
} }
else if( *psz_subtitle == '<' ) else if( *psz_subtitle == '<' )
{ {
if( !strncasecmp( psz_subtitle, "<br/>", 5 )) if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
{ {
HtmlCopy( &psz_html, &psz_subtitle, "<br/>" ); if ( !AppendCharacter( p_segment, '\n' ) )
goto fail;
psz_subtitle += strlen( "<br/>" );
} }
else if( !strncasecmp( psz_subtitle, "<b>", 3 ) ) else if( !strncasecmp( psz_subtitle, "<b>", 3 ) )
{ {
HtmlCopy( &psz_html, &psz_subtitle, "<b>" ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
HtmlPut( &psz_tag, "b" ); p_segment->style->i_style_flags |= STYLE_BOLD;
psz_subtitle += strlen( "<b>" );
} }
else if( !strncasecmp( psz_subtitle, "<i>", 3 ) ) else if( !strncasecmp( psz_subtitle, "<i>", 3 ) )
{ {
HtmlCopy( &psz_html, &psz_subtitle, "<i>" ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
HtmlPut( &psz_tag, "i" ); p_segment->style->i_style_flags |= STYLE_ITALIC;
psz_subtitle += strlen( "<i>" );
} }
else if( !strncasecmp( psz_subtitle, "<u>", 3 ) ) else if( !strncasecmp( psz_subtitle, "<u>", 3 ) )
{ {
HtmlCopy( &psz_html, &psz_subtitle, "<u>" ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
HtmlPut( &psz_tag, "u" ); p_segment->style->i_style_flags |= STYLE_UNDERLINE;
psz_subtitle += strlen( "<u>" );
} }
else if( !strncasecmp( psz_subtitle, "<s>", 3 ) ) else if( !strncasecmp( psz_subtitle, "<s>", 3 ) )
{ {
HtmlCopy( &psz_html, &psz_subtitle, "<s>" ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
HtmlPut( &psz_tag, "s" ); p_segment->style->i_style_flags |= STYLE_STRIKEOUT;
psz_subtitle += strlen( "<s>" );
} }
else if( !strncasecmp( psz_subtitle, "<font ", 6 )) else if( !strncasecmp( psz_subtitle, "<font ", 6 ))
{ {
const char *psz_attribs[] = { "face=", "family=", "size=", p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
"color=", "outline-color=", "shadow-color=", psz_subtitle += strlen( "<font " );
"outline-level=", "shadow-level=", "back-color=",
"alpha=", NULL };
HtmlCopy( &psz_html, &psz_subtitle, "<font " ); char* psz_attribute_name;
HtmlPut( &psz_tag, "f" ); char* psz_attribute_value;
while( *psz_subtitle != '>' ) while( ( psz_attribute_name = ConsumeAttribute( &psz_subtitle, &psz_attribute_value ) ) )
{ {
int k; if ( !strcasecmp( psz_attribute_name, "face" ) )
/* <font color= */
while (*psz_subtitle == ' ')
psz_subtitle++;
for( k=0; psz_attribs[ k ]; k++ )
{ {
int i_len = strlen( psz_attribs[ k ] ); p_segment->style->psz_fontname = psz_attribute_value;
psz_attribute_value = NULL;
if( !strncasecmp( psz_subtitle, psz_attribs[k], i_len ) )
{
/* */
HtmlPut( &psz_html, psz_attribs[k] );
psz_subtitle += i_len;
/* <font color= red */
while (*psz_subtitle == ' ')
psz_subtitle++;
/* */
if( *psz_subtitle == '"' )
{
psz_subtitle++;
i_len = strcspn( psz_subtitle, "\"" );
}
else if( *psz_subtitle == '\'' )
{
psz_subtitle++;
i_len = strcspn( psz_subtitle, "'" );
}
else
{
i_len = strcspn( psz_subtitle, " \t>" );
}
HtmlPut( &psz_html, "\"" );
HtmlNPut( &psz_html, psz_subtitle, i_len );
HtmlPut( &psz_html, "\"" );
psz_subtitle += i_len;
if( *psz_subtitle == '\"' || *psz_subtitle == '\'' )
psz_subtitle++;
break;
}
} }
if( psz_attribs[ k ] == NULL ) else if ( !strcasecmp( psz_attribute_name, "family" ) )
{ {
/* Jump over unrecognised tag */ p_segment->style->psz_monofontname = psz_attribute_value;
int i_len = strcspn( psz_subtitle, "\"" ); psz_attribute_value = NULL;
if( psz_subtitle[i_len] == '\"' )
{
i_len += 1 + strcspn( &psz_subtitle[i_len + 1], "\"" );
if( psz_subtitle[i_len] == '\"' )
i_len++;
}
/* Not a tag, something else we do not understand */
if( i_len == 0 )
psz_subtitle++;
psz_subtitle += i_len;
} }
else if ( !strcasecmp( psz_attribute_name, "size" ) )
HtmlNPut( &psz_html, psz_subtitle, strspn(psz_subtitle, " ") );
}
HtmlPut( &psz_html, ">" );
psz_subtitle++;
}
else if( !strncmp( psz_subtitle, "</", 2 ))
{
bool b_match = false;
bool b_ignore = false;
int i_len = (psz_tag ? strlen(psz_tag) : 0) - 1;
char *psz_lastTag = NULL;
if( i_len >= 0 )
{
psz_lastTag = psz_tag + i_len;
i_len = 0;
switch( *psz_lastTag )
{ {
case 'b': p_segment->style->i_font_size = atoi( psz_attribute_value );
b_match = !strncasecmp( psz_subtitle, "</b>", 4 ); }
i_len = 4; else if ( !strcasecmp( psz_attribute_name, "color" ) )
break; {
case 'i': p_segment->style->i_font_color = GetColor( psz_attribute_value );
b_match = !strncasecmp( psz_subtitle, "</i>", 4 ); }
i_len = 4; else if ( !strcasecmp( psz_attribute_name, "outline-color" ) )
break; {
case 'u': p_segment->style->i_outline_color = GetColor( psz_attribute_value );
b_match = !strncasecmp( psz_subtitle, "</u>", 4 ); }
i_len = 4; else if ( !strcasecmp( psz_attribute_name, "shadow-color" ) )
break; {
case 's': p_segment->style->i_shadow_color = GetColor( psz_attribute_value );
b_match = !strncasecmp( psz_subtitle, "</s>", 4 ); }
i_len = 4; else if ( !strcasecmp( psz_attribute_name, "outline-level" ) )
break; {
case 'f': p_segment->style->i_outline_width = atoi( psz_attribute_value );
b_match = !strncasecmp( psz_subtitle, "</font>", 7 ); }
i_len = 7; else if ( !strcasecmp( psz_attribute_name, "shadow-level" ) )
break; {
case 'I': p_segment->style->i_shadow_width = atoi( psz_attribute_value );
i_len = strcspn( psz_subtitle, ">" ); }
b_match = psz_subtitle[i_len] == '>'; else if ( !strcasecmp( psz_attribute_name, "back-color" ) )
b_ignore = true; {
if( b_match ) p_segment->style->i_background_color = GetColor( psz_attribute_value );
i_len++; }
break; else if ( !strcasecmp( psz_attribute_name, "alpha" ) )
{
p_segment->style->i_font_alpha = atoi( psz_attribute_value );
} }
}
if( !b_match )
{
/* Not well formed -- kill everything */
free( psz_html );
psz_html = NULL;
break;
}
*psz_lastTag = '\0';
if( !b_ignore )
HtmlNPut( &psz_html, psz_subtitle, i_len );
psz_subtitle += i_len; free( psz_attribute_name );
} free( psz_attribute_value );
else if( ( psz_subtitle[1] < 'a' || psz_subtitle[1] > 'z' ) && }
( psz_subtitle[1] < 'A' || psz_subtitle[1] > 'Z' ) ) // Skip potential spaces & end tag
{ while ( *psz_subtitle && *psz_subtitle != '>' )
/* We have a single < */ psz_subtitle++;
HtmlPut( &psz_html, "&lt;" );
psz_subtitle++;
} }
else else if( !strncmp( psz_subtitle, "</", 2 ))
{ {
/* We have an unknown tag or a single < */ size_t tag_length = 0;
const char* p_old_pos = psz_subtitle;
/* Search for the next tag or end of tag or end of string */ while ( *psz_subtitle && *psz_subtitle != '>' )
char *psz_stop = psz_subtitle + 1 + strcspn( &psz_subtitle[1], "<>" );
char *psz_closing = strstr( psz_subtitle, "/>" );
if( psz_closing && psz_closing < psz_stop )
{ {
/* We have a self closed tag, remove it */ tag_length++;
psz_subtitle = &psz_closing[2]; psz_subtitle++;
} }
else if( *psz_stop == '>' ) if ( !strncasecmp( p_old_pos, "b", tag_length ) ||
!strncasecmp( p_old_pos, "i", tag_length ) ||
!strncasecmp( p_old_pos, "u", tag_length ) ||
!strncasecmp( p_old_pos, "s", tag_length ) ||
!strncasecmp( p_old_pos, "font", tag_length ) )
{ {
char psz_match[256]; // A closing tag for one of the tags we handle, meaning
// we pushed a style onto the stack earlier
snprintf( psz_match, sizeof(psz_match), "</%s", &psz_subtitle[1] ); p_segment = NewTextSegmentPopStyle( p_segment, &p_stack );
psz_match[strcspn( psz_match, " \t>" )] = '\0';
if( strstr( psz_subtitle, psz_match ) )
{
/* We have the closing tag, ignore it TODO */
psz_subtitle = &psz_stop[1];
HtmlPut( &psz_tag, "I" );
}
else
{
int i_len = psz_stop + 1 - psz_subtitle;
/* Copy the whole data */
for( ; i_len > 0; i_len--, psz_subtitle++ )
{
if( *psz_subtitle == '<' )
HtmlPut( &psz_html, "&lt;" );
else if( *psz_subtitle == '>' )
HtmlPut( &psz_html, "&gt;" );
else
HtmlNPut( &psz_html, psz_subtitle, 1 );
}
}
} }
else else
{ {
/* We have a single < */ // Unknown closing tag, just append the '<', and go on.
HtmlPut( &psz_html, "&lt;" ); // This will make the unknown tag appear as text
psz_subtitle++; AppendCharacter( p_segment, '<' );
psz_subtitle = p_old_pos + 1;
} }
} }
}
else if( *psz_subtitle == '&' )
{
if( !strncasecmp( psz_subtitle, "&lt;", 4 ))
{
HtmlCopy( &psz_html, &psz_subtitle, "&lt;" );
}
else if( !strncasecmp( psz_subtitle, "&gt;", 4 ))
{
HtmlCopy( &psz_html, &psz_subtitle, "&gt;" );
}
else if( !strncasecmp( psz_subtitle, "&amp;", 5 ))
{
HtmlCopy( &psz_html, &psz_subtitle, "&amp;" );
}
else else
{ {
HtmlPut( &psz_html, "&amp;" ); /* We have an unknown tag, just append it, and move on.
* The rest of the string won't be recognized as a tag, and
* we will ignore unknown closing tag
*/
AppendCharacter( p_segment, '<' );
psz_subtitle++; psz_subtitle++;
} }
} }
else if( *psz_subtitle == '>' )
{
HtmlPut( &psz_html, "&gt;" );
psz_subtitle++;
}
else if( psz_subtitle[0] == '{' && psz_subtitle[1] == '\\' && else if( psz_subtitle[0] == '{' && psz_subtitle[1] == '\\' &&
strchr( psz_subtitle, '}' ) ) strchr( psz_subtitle, '}' ) )
{ {
...@@ -832,18 +971,21 @@ static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle ) ...@@ -832,18 +971,21 @@ static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle )
// FIXME: We don't do difference between Y and y, and we should. // FIXME: We don't do difference between Y and y, and we should.
if( psz_subtitle[3] == 'i' ) if( psz_subtitle[3] == 'i' )
{ {
HtmlPut( &psz_html, "<i>" ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
HtmlPut( &psz_tag, "i" ); p_segment->style->i_style_flags |= STYLE_ITALIC;
psz_subtitle++;
} }
if( psz_subtitle[3] == 'b' ) if( psz_subtitle[3] == 'b' )
{ {
HtmlPut( &psz_html, "<b>" ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
HtmlPut( &psz_tag, "b" ); p_segment->style->i_style_flags |= STYLE_BOLD;
psz_subtitle++;
} }
if( psz_subtitle[3] == 'u' ) if( psz_subtitle[3] == 'u' )
{ {
HtmlPut( &psz_html, "<u>" ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
HtmlPut( &psz_tag, "u" ); p_segment->style->i_style_flags |= STYLE_UNDERLINE;
psz_subtitle++;
} }
psz_subtitle = strchr( psz_subtitle, '}' ) + 1; psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
} }
...@@ -852,75 +994,17 @@ static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle ) ...@@ -852,75 +994,17 @@ static char *CreateHtmlSubtitle( int *pi_align, char *psz_subtitle )
// Hide other {x:y} atrocities, like {c:$bbggrr} or {P:x} // Hide other {x:y} atrocities, like {c:$bbggrr} or {P:x}
psz_subtitle = strchr( psz_subtitle, '}' ) + 1; psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
} }
else if( psz_subtitle[0] == '\\' && psz_subtitle[1] )
{
if( psz_subtitle[1] == 'N' || psz_subtitle[1] == 'n' )
{
HtmlPut( &psz_html, "<br/>" );
psz_subtitle += 2;
}
else if( psz_subtitle[1] == 'h' )
{
/* Non breakable space */
HtmlPut( &psz_html, NO_BREAKING_SPACE );
psz_subtitle += 2;
}
else
{
HtmlPut( &psz_html, "\\" );
psz_subtitle++;
}
}
else else
{ {
HtmlNPut( &psz_html, psz_subtitle, 1 ); //FIXME: Highly inneficient
#if 0 AppendCharacter( p_segment, *psz_subtitle );
if( *psz_html )
{
/* Check for double whitespace */
# error This test does not make sense.
if( ( *psz_html == ' ' || *psz_html == '\t' ) &&
( *(psz_html-1) == ' ' || *(psz_html-1) == '\t' ) )
{
HtmlPut( &psz_html, NO_BREAKING_SPACE );
psz_html--;
}
}
#endif
psz_subtitle++; psz_subtitle++;
} }
} }
while( psz_tag && *psz_tag ) return p_first_segment;
{
/* */
char *psz_last = &psz_tag[strlen(psz_tag)-1];
switch( *psz_last )
{
case 'b':
HtmlPut( &psz_html, "</b>" );
break;
case 'i':
HtmlPut( &psz_html, "</i>" );
break;
case 'u':
HtmlPut( &psz_html, "</u>" );
break;
case 's':
HtmlPut( &psz_html, "</s>" );
break;
case 'f':
HtmlPut( &psz_html, "</font>" );
break;
case 'I':
break;
}
*psz_last = '\0';
}
/* Close not well formed subtitle */
HtmlPut( &psz_html, "</text>" );
free( psz_tag );
return psz_html; fail:
text_segment_ChainDelete( p_first_segment );
return NULL;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment