Commit 456ad00b authored by Hugo Beauzée-Luyssen's avatar Hugo Beauzée-Luyssen Committed by Jean-Baptiste Kempf

subsdec: Refactor & robustify tags parsing

parent 5583581f
...@@ -733,6 +733,39 @@ static char* ConsumeAttribute( const char** ppsz_subtitle, char** psz_attribute_ ...@@ -733,6 +733,39 @@ static char* ConsumeAttribute( const char** ppsz_subtitle, char** psz_attribute_
return psz_attribute_name; return psz_attribute_name;
} }
// Returns the next tag and consume the string up to after the tag name, or
// returns NULL and doesn't advance if the angle bracket was not a tag opening
// For instance, if psz_subtitle == "<some_tag attribute=value>"
// GetTag will return "some_tag", and will advance up to the first 'a' in "attribute"
// The returned value must be freed.
static char* GetTag( const char** ppsz_subtitle, bool b_closing )
{
const char* psz_subtitle = *ppsz_subtitle;
if ( *psz_subtitle != '<' )
return NULL;
// Skip the '<'
psz_subtitle++;
if ( b_closing && *psz_subtitle == '/' )
psz_subtitle++;
// Skip potential spaces
while ( *psz_subtitle == ' ' )
psz_subtitle++;
// Now we need to verify if what comes next is a valid tag:
if ( !isalpha( *psz_subtitle ) )
return NULL;
size_t tag_size = 1;
while ( isalnum( psz_subtitle[tag_size] ) || psz_subtitle[tag_size] == '_' )
tag_size++;
char* psz_tagname = malloc( ( tag_size + 1 ) * sizeof( *psz_tagname ) );
if ( unlikely( !psz_tagname ) )
return NULL;
strncpy( psz_tagname, psz_subtitle, tag_size );
psz_tagname[tag_size] = 0;
psz_subtitle += tag_size;
*ppsz_subtitle = psz_subtitle;
return psz_tagname;
}
static int GetColor( const char* psz_color ) static int GetColor( const char* psz_color )
{ {
if ( *psz_color == '#' ) if ( *psz_color == '#' )
...@@ -824,6 +857,7 @@ static text_segment_t* NewTextSegmentPopStyle( text_segment_t* p_segment, style_ ...@@ -824,6 +857,7 @@ static text_segment_t* NewTextSegmentPopStyle( text_segment_t* p_segment, style_
} }
static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle ) static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
{ {
text_segment_t* p_segment; text_segment_t* p_segment;
...@@ -843,40 +877,37 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle ) ...@@ -843,40 +877,37 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
{ {
if( *psz_subtitle == '<' ) if( *psz_subtitle == '<' )
{ {
if( !strncasecmp( psz_subtitle, "<br/>", 5 )) char *psz_tagname = GetTag( &psz_subtitle, false );
if ( psz_tagname != NULL )
{
if( !strcasecmp( psz_tagname, "br" ) )
{ {
if ( !AppendCharacter( p_segment, '\n' ) ) if ( !AppendCharacter( p_segment, '\n' ) )
goto fail; goto fail;
psz_subtitle += strlen( "<br/>" );
} }
else if( !strncasecmp( psz_subtitle, "<b>", 3 ) ) else if( !strcasecmp( psz_tagname, "b" ) )
{ {
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
p_segment->style->i_style_flags |= STYLE_BOLD; p_segment->style->i_style_flags |= STYLE_BOLD;
psz_subtitle += strlen( "<b>" );
} }
else if( !strncasecmp( psz_subtitle, "<i>", 3 ) ) else if( !strcasecmp( psz_tagname, "i" ) )
{ {
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
p_segment->style->i_style_flags |= STYLE_ITALIC; p_segment->style->i_style_flags |= STYLE_ITALIC;
psz_subtitle += strlen( "<i>" );
} }
else if( !strncasecmp( psz_subtitle, "<u>", 3 ) ) else if( !strcasecmp( psz_tagname, "u" ) )
{ {
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
p_segment->style->i_style_flags |= STYLE_UNDERLINE; p_segment->style->i_style_flags |= STYLE_UNDERLINE;
psz_subtitle += strlen( "<u>" );
} }
else if( !strncasecmp( psz_subtitle, "<s>", 3 ) ) else if( !strcasecmp( psz_tagname, "s" ) )
{ {
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
p_segment->style->i_style_flags |= STYLE_STRIKEOUT; p_segment->style->i_style_flags |= STYLE_STRIKEOUT;
psz_subtitle += strlen( "<s>" );
} }
else if( !strncasecmp( psz_subtitle, "<font ", 6 )) else if( !strcasecmp( psz_tagname, "font" ) )
{ {
p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
psz_subtitle += strlen( "<font " );
char* psz_attribute_name; char* psz_attribute_name;
char* psz_attribute_value; char* psz_attribute_value;
...@@ -886,6 +917,7 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle ) ...@@ -886,6 +917,7 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
if ( !strcasecmp( psz_attribute_name, "face" ) ) if ( !strcasecmp( psz_attribute_name, "face" ) )
{ {
p_segment->style->psz_fontname = psz_attribute_value; p_segment->style->psz_fontname = psz_attribute_value;
// We don't want to free the attribute value since it has become our fontname
psz_attribute_value = NULL; psz_attribute_value = NULL;
} }
else if ( !strcasecmp( psz_attribute_name, "family" ) ) else if ( !strcasecmp( psz_attribute_name, "family" ) )
...@@ -929,40 +961,42 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle ) ...@@ -929,40 +961,42 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
free( psz_attribute_name ); free( psz_attribute_name );
free( psz_attribute_value ); free( psz_attribute_value );
} }
}
// Skip potential spaces & end tag // Skip potential spaces & end tag
while ( *psz_subtitle && *psz_subtitle != '>' ) while ( *psz_subtitle && *psz_subtitle != '>' )
psz_subtitle++; psz_subtitle++;
if ( *psz_subtitle == '>' ) if ( *psz_subtitle == '>' )
psz_subtitle++; psz_subtitle++;
free( psz_tagname );
} }
else if( !strncmp( psz_subtitle, "</", 2 )) else if( !strncmp( psz_subtitle, "</", 2 ))
{ {
size_t tag_length = 0; char* psz_tagname = GetTag( &psz_subtitle, true );
psz_subtitle += 2; if ( psz_tagname != NULL )
const char* p_old_pos = psz_subtitle;
while ( *psz_subtitle && *psz_subtitle != '>' )
{ {
tag_length++; if ( !strcasecmp( psz_tagname, "b" ) ||
psz_subtitle++; !strcasecmp( psz_tagname, "i" ) ||
} !strcasecmp( psz_tagname, "u" ) ||
if ( !strncasecmp( p_old_pos, "b", tag_length ) || !strcasecmp( psz_tagname, "s" ) ||
!strncasecmp( p_old_pos, "i", tag_length ) || !strcasecmp( psz_tagname, "font" ) )
!strncasecmp( p_old_pos, "u", tag_length ) ||
!strncasecmp( p_old_pos, "s", tag_length ) ||
!strncasecmp( p_old_pos, "font", tag_length ) )
{ {
// A closing tag for one of the tags we handle, meaning // A closing tag for one of the tags we handle, meaning
// we pushed a style onto the stack earlier // we pushed a style onto the stack earlier
p_segment = NewTextSegmentPopStyle( p_segment, &p_stack ); p_segment = NewTextSegmentPopStyle( p_segment, &p_stack );
// Also skip the '>'
psz_subtitle++;
} }
else else
{ {
// Unknown closing tag, just append the "</", and go on. // Unknown closing tag, just append the "</", and go on.
// This will make the unknown tag appear as text // This will make the unknown tag appear as text
AppendString( p_segment, "</" ); AppendString( p_segment, "</" );
psz_subtitle = p_old_pos + 2; AppendString( p_segment, psz_tagname );
}
while ( *psz_subtitle == ' ' )
psz_subtitle++;
if ( *psz_subtitle == '>' )
psz_subtitle++;
free( psz_tagname );
} }
} }
else else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment