Commit 456ad00b authored by Hugo Beauzée-Luyssen's avatar Hugo Beauzée-Luyssen Committed by Jean-Baptiste Kempf

subsdec: Refactor & robustify tags parsing

parent 5583581f
......@@ -733,6 +733,39 @@ static char* ConsumeAttribute( const char** ppsz_subtitle, char** psz_attribute_
return psz_attribute_name;
}
// Returns the next tag and consume the string up to after the tag name, or
// returns NULL and doesn't advance if the angle bracket was not a tag opening
// For instance, if psz_subtitle == "<some_tag attribute=value>"
// GetTag will return "some_tag", and will advance up to the first 'a' in "attribute"
// The returned value must be freed.
static char* GetTag( const char** ppsz_subtitle, bool b_closing )
{
const char* psz_subtitle = *ppsz_subtitle;
if ( *psz_subtitle != '<' )
return NULL;
// Skip the '<'
psz_subtitle++;
if ( b_closing && *psz_subtitle == '/' )
psz_subtitle++;
// Skip potential spaces
while ( *psz_subtitle == ' ' )
psz_subtitle++;
// Now we need to verify if what comes next is a valid tag:
if ( !isalpha( *psz_subtitle ) )
return NULL;
size_t tag_size = 1;
while ( isalnum( psz_subtitle[tag_size] ) || psz_subtitle[tag_size] == '_' )
tag_size++;
char* psz_tagname = malloc( ( tag_size + 1 ) * sizeof( *psz_tagname ) );
if ( unlikely( !psz_tagname ) )
return NULL;
strncpy( psz_tagname, psz_subtitle, tag_size );
psz_tagname[tag_size] = 0;
psz_subtitle += tag_size;
*ppsz_subtitle = psz_subtitle;
return psz_tagname;
}
static int GetColor( const char* psz_color )
{
if ( *psz_color == '#' )
......@@ -824,6 +857,7 @@ static text_segment_t* NewTextSegmentPopStyle( text_segment_t* p_segment, style_
}
static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
{
text_segment_t* p_segment;
......@@ -843,40 +877,37 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
{
if( *psz_subtitle == '<' )
{
if( !strncasecmp( psz_subtitle, "<br/>", 5 ))
char *psz_tagname = GetTag( &psz_subtitle, false );
if ( psz_tagname != NULL )
{
if( !strcasecmp( psz_tagname, "br" ) )
{
if ( !AppendCharacter( p_segment, '\n' ) )
goto fail;
psz_subtitle += strlen( "<br/>" );
}
else if( !strncasecmp( psz_subtitle, "<b>", 3 ) )
else if( !strcasecmp( psz_tagname, "b" ) )
{
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
p_segment->style->i_style_flags |= STYLE_BOLD;
psz_subtitle += strlen( "<b>" );
}
else if( !strncasecmp( psz_subtitle, "<i>", 3 ) )
else if( !strcasecmp( psz_tagname, "i" ) )
{
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
p_segment->style->i_style_flags |= STYLE_ITALIC;
psz_subtitle += strlen( "<i>" );
}
else if( !strncasecmp( psz_subtitle, "<u>", 3 ) )
else if( !strcasecmp( psz_tagname, "u" ) )
{
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
p_segment->style->i_style_flags |= STYLE_UNDERLINE;
psz_subtitle += strlen( "<u>" );
}
else if( !strncasecmp( psz_subtitle, "<s>", 3 ) )
else if( !strcasecmp( psz_tagname, "s" ) )
{
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
p_segment->style->i_style_flags |= STYLE_STRIKEOUT;
psz_subtitle += strlen( "<s>" );
}
else if( !strncasecmp( psz_subtitle, "<font ", 6 ))
else if( !strcasecmp( psz_tagname, "font" ) )
{
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
psz_subtitle += strlen( "<font " );
char* psz_attribute_name;
char* psz_attribute_value;
......@@ -886,6 +917,7 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
if ( !strcasecmp( psz_attribute_name, "face" ) )
{
p_segment->style->psz_fontname = psz_attribute_value;
// We don't want to free the attribute value since it has become our fontname
psz_attribute_value = NULL;
}
else if ( !strcasecmp( psz_attribute_name, "family" ) )
......@@ -929,40 +961,42 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
free( psz_attribute_name );
free( psz_attribute_value );
}
}
// Skip potential spaces & end tag
while ( *psz_subtitle && *psz_subtitle != '>' )
psz_subtitle++;
if ( *psz_subtitle == '>' )
psz_subtitle++;
free( psz_tagname );
}
else if( !strncmp( psz_subtitle, "</", 2 ))
{
size_t tag_length = 0;
psz_subtitle += 2;
const char* p_old_pos = psz_subtitle;
while ( *psz_subtitle && *psz_subtitle != '>' )
char* psz_tagname = GetTag( &psz_subtitle, true );
if ( psz_tagname != NULL )
{
tag_length++;
psz_subtitle++;
}
if ( !strncasecmp( p_old_pos, "b", tag_length ) ||
!strncasecmp( p_old_pos, "i", tag_length ) ||
!strncasecmp( p_old_pos, "u", tag_length ) ||
!strncasecmp( p_old_pos, "s", tag_length ) ||
!strncasecmp( p_old_pos, "font", tag_length ) )
if ( !strcasecmp( psz_tagname, "b" ) ||
!strcasecmp( psz_tagname, "i" ) ||
!strcasecmp( psz_tagname, "u" ) ||
!strcasecmp( psz_tagname, "s" ) ||
!strcasecmp( psz_tagname, "font" ) )
{
// A closing tag for one of the tags we handle, meaning
// we pushed a style onto the stack earlier
p_segment = NewTextSegmentPopStyle( p_segment, &p_stack );
// Also skip the '>'
psz_subtitle++;
}
else
{
// Unknown closing tag, just append the "</", and go on.
// This will make the unknown tag appear as text
AppendString( p_segment, "</" );
psz_subtitle = p_old_pos + 2;
AppendString( p_segment, psz_tagname );
}
while ( *psz_subtitle == ' ' )
psz_subtitle++;
if ( *psz_subtitle == '>' )
psz_subtitle++;
free( psz_tagname );
}
}
else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment