Commit 886db9fd authored by Tristan Matthews's avatar Tristan Matthews Committed by Jean-Baptiste Kempf

opus: add encoder

This patch adds an encoder to the opus module.
Signed-off-by: default avatarJean-Baptiste Kempf <jb@videolan.org>
parent 90de948a
...@@ -11,6 +11,7 @@ Decoders: ...@@ -11,6 +11,7 @@ Decoders:
Encoder: Encoder:
* Support for MPEG-2 encoding using x262 * Support for MPEG-2 encoding using x262
* Support for HEVC/H.265 encoding using x265 * Support for HEVC/H.265 encoding using x265
* Support for Opus encoding using libopus
Streaming: Streaming:
* WebM streaming, including live sources, compatible with all major browsers * WebM streaming, including live sources, compatible with all major browsers
......
...@@ -2643,7 +2643,7 @@ AM_CONDITIONAL([HAVE_SPEEXDSP], [test "$have_speexdsp" = "yes"]) ...@@ -2643,7 +2643,7 @@ AM_CONDITIONAL([HAVE_SPEEXDSP], [test "$have_speexdsp" = "yes"])
dnl dnl
dnl Opus plugin dnl Opus plugin
dnl dnl
PKG_ENABLE_MODULES_VLC([OPUS], [], [ogg opus], [Opus support], [auto]) PKG_ENABLE_MODULES_VLC([OPUS], [], [ogg opus >= 1.0.3], [Opus support], [auto])
dnl dnl
dnl theora decoder plugin dnl theora decoder plugin
......
...@@ -231,7 +231,7 @@ $Id$ ...@@ -231,7 +231,7 @@ $Id$
* opencv_example: OpenCV example (face identification) * opencv_example: OpenCV example (face identification)
* opencv_wrapper: OpenCV wrapper video filter * opencv_wrapper: OpenCV wrapper video filter
* opensles_android: OpenSL ES audio output for Android * opensles_android: OpenSL ES audio output for Android
* opus: a opus audio decoder/packetizer using the libopus library * opus: a opus audio decoder/packetizer/encoder using the libopus library
* os2drive: service discovery for OS/2 drives * os2drive: service discovery for OS/2 drives
* oss: audio output module using the OSS /dev/dsp interface * oss: audio output module using the OSS /dev/dsp interface
* packetizer_copy: Simple copy packetizer * packetizer_copy: Simple copy packetizer
......
...@@ -54,6 +54,10 @@ ...@@ -54,6 +54,10 @@
*****************************************************************************/ *****************************************************************************/
static int OpenDecoder ( vlc_object_t * ); static int OpenDecoder ( vlc_object_t * );
static void CloseDecoder ( vlc_object_t * ); static void CloseDecoder ( vlc_object_t * );
#ifdef ENABLE_SOUT
static int OpenEncoder ( vlc_object_t * );
static void CloseEncoder ( vlc_object_t * );
#endif
vlc_module_begin () vlc_module_begin ()
set_category( CAT_INPUT ) set_category( CAT_INPUT )
...@@ -64,6 +68,14 @@ vlc_module_begin () ...@@ -64,6 +68,14 @@ vlc_module_begin ()
set_shortname( N_("Opus") ) set_shortname( N_("Opus") )
set_callbacks( OpenDecoder, CloseDecoder ) set_callbacks( OpenDecoder, CloseDecoder )
#ifdef ENABLE_SOUT
add_submodule ()
set_description( N_("Opus audio encoder") )
set_capability( "encoder", 150 )
set_shortname( N_("Opus") )
set_callbacks( OpenEncoder, CloseEncoder )
#endif
vlc_module_end () vlc_module_end ()
/***************************************************************************** /*****************************************************************************
...@@ -433,3 +445,238 @@ static void CloseDecoder( vlc_object_t *p_this ) ...@@ -433,3 +445,238 @@ static void CloseDecoder( vlc_object_t *p_this )
free( p_sys ); free( p_sys );
} }
#ifdef ENABLE_SOUT
/* only ever encode 20 ms at a time, going longer doesn't yield much compression
gain, shorter does have a compression loss, and doesn't matter so much in
Ogg, unless you really need low latency, which would also require muxing one
packet per page. */
static const unsigned OPUS_FRAME_SIZE = 960; /* 48000 * 20 / 1000 */
struct encoder_sys_t
{
OpusMSEncoder *enc;
float *buffer;
unsigned i_nb_samples;
int i_samples_delay;
block_t *padding;
int nb_streams;
};
static unsigned fill_buffer(encoder_t *enc, unsigned src_start, block_t *src,
unsigned samples)
{
encoder_sys_t *p_sys = enc->p_sys;
const unsigned channels = enc->fmt_out.audio.i_channels;
const float *src_buf = ((const float *) src->p_buffer) + src_start;
float *dest_buf = p_sys->buffer + (p_sys->i_nb_samples * channels);
const unsigned len = samples * channels;
memcpy(dest_buf, src_buf, len * sizeof(float));
p_sys->i_nb_samples += samples;
src_start += len;
src->i_nb_samples -= samples;
return src_start;
}
static block_t *Encode(encoder_t *enc, block_t *buf)
{
encoder_sys_t *sys = enc->p_sys;
if (!buf)
return NULL;
mtime_t i_pts = buf->i_pts -
(mtime_t) CLOCK_FREQ * (mtime_t) sys->i_samples_delay /
(mtime_t) enc->fmt_in.audio.i_rate;
sys->i_samples_delay += buf->i_nb_samples;
block_t *result = NULL;
unsigned src_start = 0;
unsigned padding_start = 0;
/* The maximum Opus frame size is 1275 bytes + TOC sequence length. */
const unsigned OPUS_MAX_ENCODED_BYTES = ((1275 + 3) * sys->nb_streams) - 2;
while (sys->i_nb_samples + buf->i_nb_samples >= OPUS_FRAME_SIZE)
{
block_t *out_block = block_Alloc(OPUS_MAX_ENCODED_BYTES);
/* add padding to beginning */
if (sys->padding)
{
const size_t leftover_space = OPUS_FRAME_SIZE - sys->i_nb_samples;
padding_start = fill_buffer(enc, padding_start, sys->padding,
__MIN(sys->padding->i_nb_samples, leftover_space));
if (sys->padding->i_nb_samples <= 0)
{
block_Release(sys->padding);
sys->padding = NULL;
}
}
/* padding may have been freed either before or inside previous
* if-statement */
if (!sys->padding)
{
const size_t leftover_space = OPUS_FRAME_SIZE - sys->i_nb_samples;
src_start = fill_buffer(enc, src_start, buf,
__MIN(buf->i_nb_samples, leftover_space));
}
opus_int32 bytes_encoded = opus_multistream_encode_float(sys->enc, sys->buffer,
OPUS_FRAME_SIZE, out_block->p_buffer, out_block->i_buffer);
if (bytes_encoded < 0)
{
block_Release(out_block);
}
else
{
out_block->i_length = (mtime_t) CLOCK_FREQ *
(mtime_t) OPUS_FRAME_SIZE / (mtime_t) enc->fmt_in.audio.i_rate;
out_block->i_dts = out_block->i_pts = i_pts;
sys->i_samples_delay -= OPUS_FRAME_SIZE;
i_pts += out_block->i_length;
sys->i_nb_samples = 0;
out_block->i_buffer = bytes_encoded;
block_ChainAppend(&result, out_block);
}
}
/* put leftover samples at beginning of buffer */
if (buf->i_nb_samples > 0)
fill_buffer(enc, src_start, buf, buf->i_nb_samples);
return result;
}
static int OpenEncoder(vlc_object_t *p_this)
{
encoder_t *enc = (encoder_t *)p_this;
if (enc->fmt_out.i_codec != VLC_CODEC_OPUS)
return VLC_EGENERIC;
encoder_sys_t *sys = malloc(sizeof(*sys));
if (!sys)
return VLC_ENOMEM;
int status = VLC_SUCCESS;
sys->buffer = NULL;
sys->enc = NULL;
enc->pf_encode_audio = Encode;
enc->fmt_in.i_codec = VLC_CODEC_FL32;
enc->fmt_in.audio.i_rate = /* Only 48kHz */
enc->fmt_out.audio.i_rate = 48000;
enc->fmt_out.audio.i_channels = enc->fmt_in.audio.i_channels;
OpusHeader header;
if (opus_prepare_header(enc->fmt_out.audio.i_channels,
enc->fmt_out.audio.i_rate,
&header))
{
msg_Err(enc, "Failed to prepare header.");
status = VLC_ENOMEM;
goto error;
}
/* needed for max encoded size calculation */
sys->nb_streams = header.nb_streams;
int err;
sys->enc =
opus_multistream_surround_encoder_create(enc->fmt_in.audio.i_rate,
enc->fmt_in.audio.i_channels, header.channel_mapping,
&header.nb_streams, &header.nb_coupled, header.stream_map,
OPUS_APPLICATION_AUDIO, &err);
if (err != OPUS_OK)
{
msg_Err(enc, "Could not create encoder: error %d", err);
sys->enc = NULL;
status = VLC_EGENERIC;
goto error;
}
/* TODO: vbr, bitrate, fec */
/* Buffer for incoming audio, since opus only accepts frame sizes that are
multiples of 2.5ms */
enc->p_sys = sys;
sys->buffer = malloc(OPUS_FRAME_SIZE * header.channels * sizeof(float));
if (!sys->buffer) {
status = VLC_ENOMEM;
goto error;
}
sys->i_nb_samples = 0;
sys->i_samples_delay = 0;
int ret = opus_multistream_encoder_ctl(enc->p_sys->enc,
OPUS_GET_LOOKAHEAD(&sys->i_samples_delay));
if (ret != OPUS_OK)
msg_Err(enc, "Unable to get number of lookahead samples: %s\n",
opus_strerror(ret));
header.preskip = sys->i_samples_delay;
/* Now that we have preskip, we can write the header to extradata */
if (opus_write_header((uint8_t **) &enc->fmt_out.p_extra,
&enc->fmt_out.i_extra, &header))
{
msg_Err(enc, "Failed to write header.");
status = VLC_ENOMEM;
goto error;
}
if (sys->i_samples_delay > 0)
{
const unsigned padding_samples = sys->i_samples_delay *
enc->fmt_out.audio.i_channels;
sys->padding = block_Alloc(padding_samples * sizeof(float));
if (!sys->padding) {
status = VLC_ENOMEM;
goto error;
}
sys->padding->i_nb_samples = sys->i_samples_delay;
float *pad_ptr = (float *) sys->padding->p_buffer;
memset(pad_ptr, 0, padding_samples * sizeof(float));
}
else
{
sys->padding = NULL;
}
return status;
error:
if (sys->enc)
opus_multistream_encoder_destroy(sys->enc);
free(sys->buffer);
free(sys);
return status;
}
static void CloseEncoder(vlc_object_t *p_this)
{
encoder_t *enc = (encoder_t *)p_this;
encoder_sys_t *sys = enc->p_sys;
opus_multistream_encoder_destroy(sys->enc);
if (sys->padding)
block_Release(sys->padding);
free(sys->buffer);
free(sys);
}
#endif /* ENABLE_SOUT */
...@@ -30,8 +30,13 @@ ...@@ -30,8 +30,13 @@
#endif #endif
#include "opus_header.h" #include "opus_header.h"
#include <opus.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <vlc_common.h>
#include "../demux/xiph.h"
/* Header contents: /* Header contents:
- "OpusHead" (64 bits) - "OpusHead" (64 bits)
...@@ -213,6 +218,151 @@ int opus_header_parse(const unsigned char *packet, int len, OpusHeader *h) ...@@ -213,6 +218,151 @@ int opus_header_parse(const unsigned char *packet, int len, OpusHeader *h)
return 1; return 1;
} }
/*
Comments will be stored in the Vorbis style.
It is described in the "Structure" section of
http://www.xiph.org/ogg/vorbis/doc/v-comment.html
However, Opus and other non-vorbis formats omit the "framing_bit".
The comment header is decoded as follows:
1) [vendor_length] = unsigned little endian 32 bits integer
2) [vendor_string] = UTF-8 vector as [vendor_length] octets
3) [user_comment_list_length] = unsigned little endian 32 bits integer
4) iterate [user_comment_list_length] times {
5) [length] = unsigned little endian 32 bits integer
6) this iteration's user comment = UTF-8 vector as [length] octets
}
7) done.
*/
static char *comment_init(size_t *length)
{
/*The 'vendor' field should be the actual encoding library used.*/
const char *vendor_string = opus_get_version_string();
int vendor_length = strlen(vendor_string);
int user_comment_list_length = 0;
int len = 8 + 4 + vendor_length + 4;
char *p = malloc(len);
if (p == NULL)
return NULL;
memcpy(p, "OpusTags", 8);
SetDWLE(p + 8, vendor_length);
memcpy(p + 12, vendor_string, vendor_length);
SetDWLE(p + 12 + vendor_length, user_comment_list_length);
*length = len;
return p;
}
static int comment_add(char **comments, size_t *length, const char *tag,
const char *val)
{
char *p = *comments;
int vendor_length = GetDWLE(p + 8);
size_t user_comment_list_length = GetDWLE(p + 8 + 4 + vendor_length);
size_t tag_len = (tag ? strlen(tag) : 0);
size_t val_len = strlen(val);
size_t len = (*length) + 4 + tag_len + val_len;
p = realloc(p, len);
if (p == NULL)
return 1;
SetDWLE(p + *length, tag_len + val_len); /* length of comment */
if (tag) memcpy(p + *length + 4, tag, tag_len); /* comment */
memcpy(p + *length + 4 + tag_len, val, val_len); /* comment */
SetDWLE(p + 8 + 4 + vendor_length, user_comment_list_length + 1);
*comments = p;
*length = len;
return 0;
}
/* adds padding so that metadata can be updated without rewriting the whole file */
static int comment_pad(char **comments, size_t *length)
{
const unsigned padding = 512; /* default from opus-tools */
char *p = *comments;
/* Make sure there is at least "padding" worth of padding free, and
round up to the maximum that fits in the current ogg segments. */
size_t newlen = ((*length + padding) / 255 + 1) * 255 - 1;
p = realloc(p, newlen);
if (p == NULL)
return 1;
memset(p + *length, 0, newlen - *length);
*comments = p;
*length = newlen;
return 0;
}
int opus_prepare_header(unsigned channels, unsigned rate, OpusHeader *header)
{
header->version = 1;
header->channels = channels;
header->nb_streams = header->channels;
header->nb_coupled = 0;
header->input_sample_rate = rate;
header->gain = 0; // 0dB
header->channel_mapping = header->channels > 8 ? 255 :
header->channels > 2;
return 0;
}
int opus_write_header(uint8_t **p_extra, int *i_extra, OpusHeader *header)
{
unsigned char header_data[100];
const int packet_size = opus_header_to_packet(header, header_data,
sizeof(header_data));
ogg_packet headers[2];
headers[0].packet = header_data;
headers[0].bytes = packet_size;
headers[0].b_o_s = 1;
headers[0].e_o_s = 0;
headers[0].granulepos = 0;
headers[0].packetno = 0;
size_t comments_length;
char *comments = comment_init(&comments_length);
if (!comments)
return 1;
if (comment_add(&comments, &comments_length, "ENCODER=",
"VLC media player"))
{
free(comments);
return 1;
}
if (comment_pad(&comments, &comments_length))
{
free(comments);
return 1;
}
headers[1].packet = (unsigned char *) comments;
headers[1].bytes = comments_length;
headers[1].b_o_s = 0;
headers[1].e_o_s = 0;
headers[1].granulepos = 0;
headers[1].packetno = 1;
for (unsigned i = 0; i < ARRAY_SIZE(headers); ++i)
{
if (xiph_AppendHeaders(i_extra, (void **) p_extra,
headers[i].bytes, headers[i].packet))
{
*i_extra = 0;
*p_extra = NULL;
}
}
return 0;
}
int opus_header_to_packet(const OpusHeader *h, unsigned char *packet, int len) int opus_header_to_packet(const OpusHeader *h, unsigned char *packet, int len)
{ {
Packet p; Packet p;
......
...@@ -45,5 +45,7 @@ typedef struct { ...@@ -45,5 +45,7 @@ typedef struct {
int opus_header_parse(const unsigned char *header, int len, OpusHeader *h); int opus_header_parse(const unsigned char *header, int len, OpusHeader *h);
int opus_header_to_packet(const OpusHeader *h, unsigned char *packet, int len); int opus_header_to_packet(const OpusHeader *h, unsigned char *packet, int len);
int opus_prepare_header(unsigned channels, unsigned rate, OpusHeader *header);
int opus_write_header(uint8_t **p_extra, int *i_extra, OpusHeader *header);
#endif #endif
...@@ -361,6 +361,10 @@ static int AddStream( sout_mux_t *p_mux, sout_input_t *p_input ) ...@@ -361,6 +361,10 @@ static int AddStream( sout_mux_t *p_mux, sout_input_t *p_input )
case AUDIO_ES: case AUDIO_ES:
switch( p_stream->i_fourcc ) switch( p_stream->i_fourcc )
{ {
case VLC_CODEC_OPUS:
msg_Dbg( p_mux, "opus stream" );
break;
case VLC_CODEC_VORBIS: case VLC_CODEC_VORBIS:
msg_Dbg( p_mux, "vorbis stream" ); msg_Dbg( p_mux, "vorbis stream" );
break; break;
...@@ -626,6 +630,7 @@ static block_t *OggCreateHeader( sout_mux_t *p_mux ) ...@@ -626,6 +630,7 @@ static block_t *OggCreateHeader( sout_mux_t *p_mux )
if( p_stream->i_fourcc == VLC_CODEC_VORBIS || if( p_stream->i_fourcc == VLC_CODEC_VORBIS ||
p_stream->i_fourcc == VLC_CODEC_SPEEX || p_stream->i_fourcc == VLC_CODEC_SPEEX ||
p_stream->i_fourcc == VLC_CODEC_OPUS ||
p_stream->i_fourcc == VLC_CODEC_THEORA ) p_stream->i_fourcc == VLC_CODEC_THEORA )
{ {
/* First packet in order: vorbis/speex/theora info */ /* First packet in order: vorbis/speex/theora info */
...@@ -713,6 +718,7 @@ static block_t *OggCreateHeader( sout_mux_t *p_mux ) ...@@ -713,6 +718,7 @@ static block_t *OggCreateHeader( sout_mux_t *p_mux )
if( p_stream->i_fourcc == VLC_CODEC_VORBIS || if( p_stream->i_fourcc == VLC_CODEC_VORBIS ||
p_stream->i_fourcc == VLC_CODEC_SPEEX || p_stream->i_fourcc == VLC_CODEC_SPEEX ||
p_stream->i_fourcc == VLC_CODEC_OPUS ||
p_stream->i_fourcc == VLC_CODEC_THEORA ) p_stream->i_fourcc == VLC_CODEC_THEORA )
{ {
unsigned pi_size[XIPH_MAX_HEADER_COUNT]; unsigned pi_size[XIPH_MAX_HEADER_COUNT];
...@@ -977,6 +983,7 @@ static int MuxBlock( sout_mux_t *p_mux, sout_input_t *p_input ) ...@@ -977,6 +983,7 @@ static int MuxBlock( sout_mux_t *p_mux, sout_input_t *p_input )
if( p_stream->i_fourcc != VLC_CODEC_VORBIS && if( p_stream->i_fourcc != VLC_CODEC_VORBIS &&
p_stream->i_fourcc != VLC_CODEC_FLAC && p_stream->i_fourcc != VLC_CODEC_FLAC &&
p_stream->i_fourcc != VLC_CODEC_SPEEX && p_stream->i_fourcc != VLC_CODEC_SPEEX &&
p_stream->i_fourcc != VLC_CODEC_OPUS &&
p_stream->i_fourcc != VLC_CODEC_THEORA && p_stream->i_fourcc != VLC_CODEC_THEORA &&
p_stream->i_fourcc != VLC_CODEC_DIRAC ) p_stream->i_fourcc != VLC_CODEC_DIRAC )
{ {
...@@ -995,6 +1002,7 @@ static int MuxBlock( sout_mux_t *p_mux, sout_input_t *p_input ) ...@@ -995,6 +1002,7 @@ static int MuxBlock( sout_mux_t *p_mux, sout_input_t *p_input )
{ {
if( p_stream->i_fourcc == VLC_CODEC_VORBIS || if( p_stream->i_fourcc == VLC_CODEC_VORBIS ||
p_stream->i_fourcc == VLC_CODEC_FLAC || p_stream->i_fourcc == VLC_CODEC_FLAC ||
p_stream->i_fourcc == VLC_CODEC_OPUS ||
p_stream->i_fourcc == VLC_CODEC_SPEEX ) p_stream->i_fourcc == VLC_CODEC_SPEEX )
{ {
/* number of sample from begining + current packet */ /* number of sample from begining + current packet */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment