#endif
#include "compat.h"
#include "wav_reader.h"
+#include "caf_reader.h"
#include "aacenc.h"
#include "m4af.h"
#include "progress.h"
" 0: Off\n"
" 1: On(default)\n"
" -L, --lowdelay-sbr Enable ELD-SBR (AAC ELD only)\n"
-" -s, --sbr-signaling <n> SBR signaling mode\n"
-" 0: Implicit, backward compatible(default)\n"
-" 1: Explicit SBR and implicit PS\n"
-" 2: Explicit hierarchical signaling\n"
" -f, --transport-format <n> Transport format\n"
" 0: RAW (default, muxed into M4A)\n"
" 1: ADIF\n"
" transport layer\n"
"\n"
" -o <filename> Output filename\n"
-" --ignorelength Ignore length of WAV header\n"
+" -G, --gapless-mode <n> Encoder delay signaling for gapless playback\n"
+" 0: iTunSMPB (default)\n"
+" 1: ISO standard (edts + sgpd)\n"
+" 2: Both\n"
+" --include-sbr-delay Count SBR decoder delay in encoder delay\n"
+" This is not iTunes compatible, but is default\n"
+" behavior of FDK library.\n"
+" -I, --ignorelength Ignore length of WAV header\n"
" -S, --silent Don't print progress messages\n"
+" --moov-before-mdat Place moov box before mdat box on m4a output\n"
"\n"
"Options for raw (headerless) input:\n"
" -R, --raw Treat input as raw (by default WAV is\n"
AACENC_PARAMS
char *input_filename;
+ FILE *input_fp;
char *output_filename;
+ FILE *output_fp;
+ unsigned gapless_mode;
+ unsigned include_sbr_delay;
unsigned ignore_length;
int silent;
+ int moov_before_mdat;
int is_raw;
unsigned raw_channels;
unsigned raw_rate;
const char *raw_format;
- aacenc_tag_param_t tags;
+ aacenc_tag_store_t tags;
+ aacenc_tag_store_t source_tags;
+ aacenc_translate_generic_text_tag_ctx_t source_tag_ctx;
char *json_filename;
} aacenc_param_ex_t;
int ch;
unsigned n;
+#define OPT_INCLUDE_SBR_DELAY M4AF_FOURCC('s','d','l','y')
+#define OPT_MOOV_BEFORE_MDAT M4AF_FOURCC('m','o','o','v')
#define OPT_RAW_CHANNELS M4AF_FOURCC('r','c','h','n')
#define OPT_RAW_RATE M4AF_FOURCC('r','r','a','t')
#define OPT_RAW_FORMAT M4AF_FOURCC('r','f','m','t')
{ "bandwidth", required_argument, 0, 'w' },
{ "afterburner", required_argument, 0, 'a' },
{ "lowdelay-sbr", no_argument, 0, 'L' },
- { "sbr-signaling", required_argument, 0, 's' },
{ "transport-format", required_argument, 0, 'f' },
{ "adts-crc-check", no_argument, 0, 'C' },
{ "header-period", required_argument, 0, 'P' },
+ { "gapless-mode", required_argument, 0, 'G' },
+ { "include-sbr-delay", no_argument, 0, OPT_INCLUDE_SBR_DELAY },
{ "ignorelength", no_argument, 0, 'I' },
{ "silent", no_argument, 0, 'S' },
+ { "moov-before-mdat", no_argument, 0, OPT_MOOV_BEFORE_MDAT },
{ "raw", no_argument, 0, 'R' },
{ "raw-channels", required_argument, 0, OPT_RAW_CHANNELS },
params->afterburner = 1;
aacenc_getmainargs(&argc, &argv);
- while ((ch = getopt_long(argc, argv, "hp:b:m:w:a:Ls:f:CP:Io:SR",
+ while ((ch = getopt_long(argc, argv, "hp:b:m:w:a:Ls:f:CP:G:Io:SR",
long_options, 0)) != EOF) {
switch (ch) {
case 'h':
case 'L':
params->lowdelay_sbr = 1;
break;
- case 's':
- if (sscanf(optarg, "%u", &n) != 1 || n > 2) {
- fprintf(stderr, "invalid arg for sbr-signaling\n");
- return -1;
- }
- params->sbr_signaling = n;
- break;
case 'f':
if (sscanf(optarg, "%u", &n) != 1) {
fprintf(stderr, "invalid arg for transport-format\n");
case 'o':
params->output_filename = optarg;
break;
+ case 'G':
+ if (sscanf(optarg, "%u", &n) != 1 || n > 2) {
+ fprintf(stderr, "invalid arg for gapless-mode\n");
+ return -1;
+ }
+ params->gapless_mode = n;
+ break;
+ case OPT_INCLUDE_SBR_DELAY:
+ params->include_sbr_delay = 1;
+ break;
case 'I':
params->ignore_length = 1;
break;
case 'S':
params->silent = 1;
break;
+ case OPT_MOOV_BEFORE_MDAT:
+ params->moov_before_mdat = 1;
+ break;
case 'R':
params->is_raw = 1;
break;
case M4AF_TAG_TRACK:
case M4AF_TAG_DISK:
case M4AF_TAG_TEMPO:
- aacenc_param_add_itmf_entry(¶ms->tags, ch, 0, optarg,
- strlen(optarg), 0);
+ aacenc_add_tag_to_store(¶ms->tags, ch, 0, optarg,
+ strlen(optarg), 0);
break;
case OPT_SHORT_TAG:
case OPT_SHORT_TAG_FILE:
for (; *optarg; ++optarg)
fcc = ((fcc << 8) | (*optarg & 0xff));
}
- aacenc_param_add_itmf_entry(¶ms->tags, fcc, optarg,
- val, strlen(val),
- ch == OPT_SHORT_TAG_FILE);
+ aacenc_add_tag_to_store(¶ms->tags, fcc, optarg,
+ val, strlen(val),
+ ch == OPT_SHORT_TAG_FILE);
}
break;
case OPT_TAG_FROM_JSON:
}
static
-int encode(wav_reader_t *wavf, HANDLE_AACENCODER encoder,
- uint32_t frame_length, FILE *ofp, m4af_ctx_t *m4af,
- int show_progress)
+int encode(aacenc_param_ex_t *params, pcm_reader_t *reader,
+ HANDLE_AACENCODER encoder, uint32_t frame_length,
+ m4af_ctx_t *m4af)
{
- uint8_t *ibuf = 0;
- int16_t *pcmbuf = 0;
- uint32_t pcmsize = 0;
- uint8_t *obuf = 0;
- uint32_t olen;
- uint32_t osize = 0;
+ struct buffer_t {
+ uint8_t *data;
+ uint32_t len, size;
+ };
+ int16_t *ibuf = 0, *ip;
+ struct buffer_t obuf[2] = {{ 0 }}, *obp;
+ unsigned flip = 0;
int nread = 1;
- int consumed;
int rc = -1;
- int frames_written = 0;
+ int remaining, consumed;
+ int frames_written = 0, encoded = 0;
aacenc_progress_t progress = { 0 };
- const pcm_sample_description_t *format = wav_get_format(wavf);
+ const pcm_sample_description_t *fmt = pcm_get_format(reader);
+
+ ibuf = malloc(frame_length * fmt->bytes_per_frame);
+ aacenc_progress_init(&progress, pcm_get_length(reader), fmt->sample_rate);
- ibuf = malloc(frame_length * format->bytes_per_frame);
- aacenc_progress_init(&progress, wav_get_length(wavf), format->sample_rate);
- do {
+ for (;;) {
+ /*
+ * Since we delay the write, we cannot just exit loop when interrupted.
+ * Instead, we regard it as EOF.
+ */
if (g_interrupted)
nread = 0;
- else if (nread) {
- if ((nread = wav_read_frames(wavf, ibuf, frame_length)) < 0) {
+ if (nread > 0) {
+ if ((nread = pcm_read_frames(reader, ibuf, frame_length)) < 0) {
fprintf(stderr, "ERROR: read failed\n");
goto END;
- } else if (nread > 0) {
- if (pcm_convert_to_native_sint16(format, ibuf, nread,
- &pcmbuf, &pcmsize) < 0) {
- fprintf(stderr, "ERROR: unsupported sample format\n");
- goto END;
- }
}
- if (show_progress)
- aacenc_progress_update(&progress, wav_get_position(wavf),
- format->sample_rate * 2);
+ if (!params->silent)
+ aacenc_progress_update(&progress, pcm_get_position(reader),
+ fmt->sample_rate * 2);
}
- if ((consumed = aac_encode_frame(encoder, format, pcmbuf, nread,
- &obuf, &olen, &osize)) < 0)
- goto END;
- if (olen > 0) {
- if (write_sample(ofp, m4af, obuf, olen, frame_length) < 0)
+ ip = ibuf;
+ remaining = nread;
+ do {
+ obp = &obuf[flip];
+ consumed = aac_encode_frame(encoder, fmt, ip, remaining,
+ &obp->data, &obp->len, &obp->size);
+ if (consumed < 0) goto END;
+ if (consumed == 0 && obp->len == 0) goto DONE;
+ if (obp->len == 0) break;
+
+ remaining -= consumed;
+ ip += consumed * fmt->channels_per_frame;
+ flip ^= 1;
+ /*
+ * As we pad 1 frame at beginning and ending by our extrapolator,
+ * we want to drop them.
+ * We delay output by 1 frame by double buffering, and discard
+ * second frame and final frame from the encoder.
+ * Since sbr_header is included in the first frame (in case of
+ * SBR), we cannot discard first frame. So we pick second instead.
+ */
+ ++encoded;
+ if (encoded == 1 || encoded == 3)
+ continue;
+ obp = &obuf[flip];
+ if (write_sample(params->output_fp, m4af, obp->data, obp->len,
+ frame_length) < 0)
goto END;
++frames_written;
- }
- } while (nread > 0 || olen > 0);
-
- if (show_progress)
- aacenc_progress_finish(&progress, wav_get_position(wavf));
+ } while (remaining > 0);
+ }
+DONE:
+ if (!params->silent)
+ aacenc_progress_finish(&progress, pcm_get_position(reader));
rc = frames_written;
END:
if (ibuf) free(ibuf);
- if (pcmbuf) free(pcmbuf);
- if (obuf) free(obuf);
+ if (obuf[0].data) free(obuf[0].data);
+ if (obuf[1].data) free(obuf[1].data);
return rc;
}
HANDLE_AACENCODER encoder)
{
unsigned i;
- aacenc_tag_entry_t *tag = params->tags.tag_table;
+ aacenc_tag_entry_t *tag;
+
+ tag = params->source_tags.tag_table;
+ for (i = 0; i < params->source_tags.tag_count; ++i, ++tag)
+ aacenc_write_tag_entry(m4af, tag);
if (params->json_filename)
- aacenc_put_tags_from_json(m4af, params->json_filename);
+ aacenc_write_tags_from_json(m4af, params->json_filename);
+ tag = params->tags.tag_table;
for (i = 0; i < params->tags.tag_count; ++i, ++tag)
- aacenc_put_tag_entry(m4af, tag);
+ aacenc_write_tag_entry(m4af, tag);
put_tool_tag(m4af, params, encoder);
- if (m4af_finalize(m4af) < 0) {
+ if (m4af_finalize(m4af, params->moov_before_mdat) < 0) {
fprintf(stderr, "ERROR: failed to finalize m4a\n");
return -1;
}
const char *ext_org = strrchr(base, '.');
if (ext_org) ilen = ext_org - base;
p = malloc(ilen + ext_len + 1);
- sprintf(p, "%.*s%s", ilen, base, ext);
+ sprintf(p, "%.*s%s", (int)ilen, base, ext);
}
return p;
}
return 0;
}
+static pcm_io_vtbl_t pcm_io_vtbl = {
+ read_callback, seek_callback, tell_callback
+};
+static pcm_io_vtbl_t pcm_io_vtbl_noseek = { read_callback, 0, 0 };
+
+static
+pcm_reader_t *open_input(aacenc_param_ex_t *params)
+{
+ pcm_io_context_t io = { 0 };
+ pcm_reader_t *reader = 0;
+ struct stat stb = { 0 };
+
+ if ((params->input_fp = aacenc_fopen(params->input_filename, "rb")) == 0) {
+ aacenc_fprintf(stderr, "ERROR: %s: %s\n", params->input_filename,
+ strerror(errno));
+ goto END;
+ }
+ io.cookie = params->input_fp;
+ if (fstat(fileno(params->input_fp), &stb) == 0
+ && (stb.st_mode & S_IFMT) == S_IFREG)
+ io.vtbl = &pcm_io_vtbl;
+ else
+ io.vtbl = &pcm_io_vtbl_noseek;
+
+ if (params->is_raw) {
+ int bytes_per_channel;
+ pcm_sample_description_t desc = { 0 };
+ if (parse_raw_spec(params->raw_format, &desc) < 0) {
+ fprintf(stderr, "ERROR: invalid raw-format spec\n");
+ goto END;
+ }
+ desc.sample_rate = params->raw_rate;
+ desc.channels_per_frame = params->raw_channels;
+ bytes_per_channel = (desc.bits_per_channel + 7) / 8;
+ desc.bytes_per_frame = params->raw_channels * bytes_per_channel;
+ if ((reader = raw_open(&io, &desc)) == 0) {
+ fprintf(stderr, "ERROR: failed to open raw input\n");
+ goto END;
+ }
+ } else {
+ int c;
+ ungetc(c = getc(params->input_fp), params->input_fp);
+
+ switch (c) {
+ case 'R':
+ if ((reader = wav_open(&io, params->ignore_length)) == 0) {
+ fprintf(stderr, "ERROR: broken / unsupported input file\n");
+ goto END;
+ }
+ break;
+ case 'c':
+ params->source_tag_ctx.add = aacenc_add_tag_entry_to_store;
+ params->source_tag_ctx.add_ctx = ¶ms->source_tags;
+ if ((reader = caf_open(&io,
+ aacenc_translate_generic_text_tag,
+ ¶ms->source_tag_ctx)) == 0) {
+ fprintf(stderr, "ERROR: broken / unsupported input file\n");
+ goto END;
+ }
+ break;
+ default:
+ fprintf(stderr, "ERROR: unsupported input file\n");
+ goto END;
+ }
+ }
+ if ((reader = pcm_open_sint16_converter(reader)) != 0)
+ reader = extrapolater_open(reader);
+ return reader;
+END:
+ return 0;
+}
+
int main(int argc, char **argv)
{
- wav_io_context_t wav_io = { read_callback, seek_callback, tell_callback };
- m4af_io_callbacks_t
- m4af_io = { 0, write_callback, seek_callback, tell_callback };
+ static m4af_io_callbacks_t m4af_io = {
+ read_callback, write_callback, seek_callback, tell_callback
+ };
aacenc_param_ex_t params = { 0 };
int result = 2;
- FILE *ifp = 0;
- FILE *ofp = 0;
char *output_filename = 0;
- wav_reader_t *wavf = 0;
+ pcm_reader_t *reader = 0;
HANDLE_AACENCODER encoder = 0;
AACENC_InfoStruct aacinfo = { 0 };
m4af_ctx_t *m4af = 0;
const pcm_sample_description_t *sample_format;
int downsampled_timescale = 0;
int frame_count = 0;
- struct stat stb = { 0 };
+ int sbr_mode = 0;
setlocale(LC_CTYPE, "");
setbuf(stderr, 0);
if (parse_options(argc, argv, ¶ms) < 0)
return 1;
- if ((ifp = aacenc_fopen(params.input_filename, "rb")) == 0) {
- aacenc_fprintf(stderr, "ERROR: %s: %s\n", params.input_filename,
- strerror(errno));
+ if ((reader = open_input(¶ms)) == 0)
goto END;
- }
- if (fstat(fileno(ifp), &stb) == 0 && (stb.st_mode & S_IFMT) != S_IFREG) {
- wav_io.seek = 0;
- wav_io.tell = 0;
- }
- if (!params.is_raw) {
- if ((wavf = wav_open(&wav_io, ifp, params.ignore_length)) == 0) {
- fprintf(stderr, "ERROR: broken / unsupported input file\n");
- goto END;
- }
- } else {
- int bytes_per_channel;
- pcm_sample_description_t desc = { 0 };
- if (parse_raw_spec(params.raw_format, &desc) < 0) {
- fprintf(stderr, "ERROR: invalid raw-format spec\n");
- goto END;
- }
- desc.sample_rate = params.raw_rate;
- desc.channels_per_frame = params.raw_channels;
- bytes_per_channel = (desc.bits_per_channel + 7) / 8;
- desc.bytes_per_frame = params.raw_channels * bytes_per_channel;
- if ((wavf = raw_open(&wav_io, ifp, &desc)) == 0) {
- fprintf(stderr, "ERROR: failed to open raw input\n");
- goto END;
- }
- }
- sample_format = wav_get_format(wavf);
+
+ sample_format = pcm_get_format(reader);
+
+ /*
+ * We use explicit/hierarchical signaling for LOAS.
+ * Other than that, we request implicit signaling to FDK library, then
+ * append explicit/backward-compatible signaling to ASC in case of MP4FF.
+ *
+ * Explicit/backward-compatible signaling of SBR is the most recommended
+ * way in MPEG4 part3 spec, and seems the only way supported by iTunes.
+ * Since FDK library does not support it, we have to do it on our side.
+ */
+ params.sbr_signaling = (params.transport_format == TT_MP4_LOAS) ? 2 : 0;
if (aacenc_init(&encoder, (aacenc_param_t*)¶ms, sample_format,
&aacinfo) < 0)
params.output_filename = output_filename;
}
- if ((ofp = aacenc_fopen(params.output_filename, "wb")) == 0) {
+ if ((params.output_fp = aacenc_fopen(params.output_filename, "wb+")) == 0) {
aacenc_fprintf(stderr, "ERROR: %s: %s\n", params.output_filename,
strerror(errno));
goto END;
}
handle_signals();
+ sbr_mode = aacenc_is_sbr_active((aacenc_param_t*)¶ms);
if (!params.transport_format) {
uint32_t scale;
+ uint8_t mp4asc[32];
+ uint32_t ascsize = sizeof(mp4asc);
unsigned framelen = aacinfo.frameLength;
- int sbr_mode = aacenc_is_sbr_active((aacenc_param_t*)¶ms);
- int sig_mode = aacEncoder_GetParam(encoder, AACENC_SIGNALING_MODE);
- if (sbr_mode && !sig_mode)
+ if (sbr_mode)
downsampled_timescale = 1;
scale = sample_format->sample_rate >> downsampled_timescale;
- if ((m4af = m4af_create(M4AF_CODEC_MP4A, scale, &m4af_io, ofp)) < 0)
+ if ((m4af = m4af_create(M4AF_CODEC_MP4A, scale, &m4af_io,
+ params.output_fp)) < 0)
goto END;
- m4af_set_decoder_specific_info(m4af, 0, aacinfo.confBuf,
- aacinfo.confSize);
+ aacenc_mp4asc((aacenc_param_t*)¶ms, aacinfo.confBuf,
+ aacinfo.confSize, mp4asc, &ascsize);
+ m4af_set_decoder_specific_info(m4af, 0, mp4asc, ascsize);
m4af_set_fixed_frame_duration(m4af, 0,
framelen >> downsampled_timescale);
+ m4af_set_vbr_mode(m4af, 0, params.bitrate_mode);
+ m4af_set_priming_mode(m4af, params.gapless_mode + 1);
m4af_begin_write(m4af);
}
- frame_count = encode(wavf, encoder, aacinfo.frameLength, ofp, m4af,
- !params.silent);
+ frame_count = encode(¶ms, reader, encoder, aacinfo.frameLength, m4af);
if (frame_count < 0)
goto END;
if (m4af) {
uint32_t delay = aacinfo.encoderDelay;
- int64_t frames_read = wav_get_position(wavf);
- uint32_t padding = frame_count * aacinfo.frameLength
- - frames_read - aacinfo.encoderDelay;
+ uint32_t padding;
+ int64_t frames_read = pcm_get_position(reader);
+
+ if (sbr_mode && params.profile != AOT_ER_AAC_ELD &&
+ !params.include_sbr_delay)
+ delay -= 481 << 1;
+ if (sbr_mode && (delay & 1))
+ ++delay;
+ padding = frame_count * aacinfo.frameLength - frames_read - delay;
m4af_set_priming(m4af, 0, delay >> downsampled_timescale,
padding >> downsampled_timescale);
if (finalize_m4a(m4af, ¶ms, encoder) < 0)
}
result = 0;
END:
- if (wavf) wav_teardown(&wavf);
- if (ifp) fclose(ifp);
+ if (reader) pcm_teardown(&reader);
+ if (params.input_fp) fclose(params.input_fp);
if (m4af) m4af_teardown(&m4af);
- if (ofp) fclose(ofp);
+ if (params.output_fp) fclose(params.output_fp);
if (encoder) aacEncClose(&encoder);
if (output_filename) free(output_filename);
- if (params.tags.tag_table) free(params.tags.tag_table);
+ if (params.tags.tag_table)
+ aacenc_free_tag_store(¶ms.tags);
+ if (params.source_tags.tag_table)
+ aacenc_free_tag_store(¶ms.source_tags);
return result;
}