From 1af8624b009faf5bc6b9b8dfa76676483da5f6cf Mon Sep 17 00:00:00 2001 From: nu774 Date: Thu, 24 Oct 2013 20:09:17 +0900 Subject: [PATCH] caf input support --- MSVC/fdkaac.vcxproj | 1 + MSVC/fdkaac.vcxproj.filters | 15 +++ Makefile.am | 1 + src/caf_reader.c | 253 ++++++++++++++++++++++++++++++++++++ src/caf_reader.h | 15 +++ src/catypes.h | 241 ++++++++++++++++++++++++++++++++++ src/main.c | 38 +++++- src/metadata.c | 3 +- src/pcm_reader.h | 20 ++- src/pcm_readhelper.c | 202 ++++++++++++++++++++++++++++ src/wav_reader.c | 9 +- src/wav_reader.h | 2 - 12 files changed, 785 insertions(+), 15 deletions(-) create mode 100644 src/caf_reader.c create mode 100644 src/caf_reader.h create mode 100644 src/catypes.h diff --git a/MSVC/fdkaac.vcxproj b/MSVC/fdkaac.vcxproj index 99840f0..a07b91e 100644 --- a/MSVC/fdkaac.vcxproj +++ b/MSVC/fdkaac.vcxproj @@ -96,6 +96,7 @@ copy ..\fdk-aac\libSYS\include\machine_type.h include\fdk-aac\ + diff --git a/MSVC/fdkaac.vcxproj.filters b/MSVC/fdkaac.vcxproj.filters index 7ce283c..feae3ab 100644 --- a/MSVC/fdkaac.vcxproj.filters +++ b/MSVC/fdkaac.vcxproj.filters @@ -18,6 +18,9 @@ Source Files + + Source Files + Source Files @@ -30,6 +33,12 @@ Source Files + + Source Files + + + Source Files + Source Files @@ -44,6 +53,12 @@ Header Files + + Header Files + + + Header Files + Header Files diff --git a/Makefile.am b/Makefile.am index afe85ed..94a1c7c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,6 +5,7 @@ bin_PROGRAMS = fdkaac fdkaac_SOURCES = \ src/aacenc.c \ + src/caf_reader.c \ src/lpcm.c \ src/m4af.c \ src/main.c \ diff --git a/src/caf_reader.c b/src/caf_reader.c new file mode 100644 index 0000000..ede919a --- /dev/null +++ b/src/caf_reader.c @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2013 nu774 + * For conditions of distribution and use, see copyright notice in COPYING + */ +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#if HAVE_STDINT_H +# include +#endif + +#include +#include +#include +#include +#include "caf_reader.h" +#include "m4af.h" + +typedef struct caf_reader_t { + pcm_reader_vtbl_t *vtbl; + pcm_sample_description_t sample_format; + int64_t length; + int64_t position; + int64_t data_offset; + pcm_io_context_t io; + aacenc_tag_callback_t tag_callback; + void *tag_ctx; + uint8_t chanmap[8]; +} caf_reader_t; + +static const pcm_sample_description_t *caf_get_format(pcm_reader_t *reader) +{ + return &((caf_reader_t *)reader)->sample_format; +} + +static int64_t caf_get_length(pcm_reader_t *reader) +{ + return ((caf_reader_t *)reader)->length; +} + +static int64_t caf_get_position(pcm_reader_t *reader) +{ + return ((caf_reader_t *)reader)->position; +} + +static void caf_teardown(pcm_reader_t **reader) +{ + free(*reader); + *reader = 0; +} + +static +uint32_t caf_next_chunk(caf_reader_t *reader, int64_t *chunk_size) +{ + uint32_t fcc; + if (pcm_scanb(&reader->io, "LQ", &fcc, chunk_size) == 2) + return fcc; + return 0; +} + +static +int caf_desc(caf_reader_t *reader, int64_t chunk_size) +{ + double mSampleRate; + uint32_t mFormatID, mFormatFlags, mBytesPerPacket, mFramesPerPacket, + mChannelsPerFrame, mBitsPerChannel; + pcm_sample_description_t *desc = &reader->sample_format; + + ENSURE(chunk_size >= 32); + TRY_IO(pcm_scanb(&reader->io, "QLLLLLL", &mSampleRate, &mFormatID, + &mFormatFlags, &mBytesPerPacket, &mFramesPerPacket, + &mChannelsPerFrame, &mBitsPerChannel) != 7); + + ENSURE(mFormatID == M4AF_FOURCC('l','p','c','m')); + ENSURE(mSampleRate && mBytesPerPacket && + mChannelsPerFrame >= 1 && mChannelsPerFrame <= 8 && + mBitsPerChannel && mFramesPerPacket == 1 && + mBytesPerPacket % mChannelsPerFrame == 0 && + mBytesPerPacket >= mChannelsPerFrame * ((mBitsPerChannel + 7) / 8)); + + desc->sample_rate = mSampleRate; + desc->bits_per_channel = mBitsPerChannel; + desc->bytes_per_frame = mBytesPerPacket; + desc->channels_per_frame = mChannelsPerFrame; + + switch (mFormatFlags) { + case 0: desc->sample_type = PCM_TYPE_SINT_BE; break; + case 1: desc->sample_type = PCM_TYPE_FLOAT_BE; break; + case 2: desc->sample_type = PCM_TYPE_SINT; break; + case 3: desc->sample_type = PCM_TYPE_FLOAT; break; + default: goto FAIL; + } + + TRY_IO(pcm_skip(&reader->io, chunk_size - 32)); + return 0; +FAIL: + return -1; +} + +static +int caf_info(caf_reader_t *reader, int64_t chunk_size) +{ + char *buf, *key, *val, *end; + size_t len; + + if (chunk_size < 4 || (buf = malloc(chunk_size)) == 0) + return -1; + pcm_read(&reader->io, buf, chunk_size); + key = buf + 4; + end = buf + chunk_size; + do { + if ((val = key + strlen(key) + 1) < end) { + len = strlen(val); + if (reader->tag_callback) + reader->tag_callback(reader->tag_ctx, key, val, len); + key = val + len + 1; + } + } while (key < end && val < end); + + if (reader->tag_callback) + reader->tag_callback(reader->tag_ctx, 0, 0, 0); + free(buf); + return 0; +} + +static +int caf_read_frames(pcm_reader_t *preader, void *buffer, unsigned nframes) +{ + int rc; + unsigned i, j, nbytes; + caf_reader_t *reader = (caf_reader_t *)preader; + unsigned bpf = reader->sample_format.bytes_per_frame; + unsigned nchannels = reader->sample_format.channels_per_frame; + unsigned bpc = bpf / nchannels; + uint8_t tmp[64]; /* enough room for maximum bpf: 8ch float64 */ + uint8_t *bp; + uint8_t *chanmap = reader->chanmap; + + if (nframes > reader->length - reader->position) + nframes = reader->length - reader->position; + nbytes = nframes * bpf; + if (nbytes) { + if ((rc = pcm_read(&reader->io, buffer, nbytes)) < 0) + return -1; + nframes = rc / bpf; + for (bp = buffer, i = 0; i < nframes; ++i, bp += bpf) { + memcpy(tmp, bp, bpf); + for (j = 0; j < nchannels; ++j) + memcpy(bp + bpc * j, tmp + bpc * chanmap[j], bpc); + } + reader->position += nframes; + } + if (nframes == 0) { + /* fetch info after data chunk */ + uint32_t fcc; + int64_t chunk_size; + while ((fcc = caf_next_chunk(reader, &chunk_size)) != 0) { + if (fcc == M4AF_FOURCC('i','n','f','o')) + TRY_IO(caf_info(reader, chunk_size)); + else + TRY_IO(pcm_skip(&reader->io, chunk_size)); + } + } + return nframes; +FAIL: + return 0; +} + +static +int caf_parse(caf_reader_t *reader, int64_t *data_length) +{ + uint32_t fcc; + int64_t chunk_size; + + *data_length = 0; + + /* CAFFileHeader */ + TRY_IO(pcm_read32be(&reader->io, &fcc)); + ENSURE(fcc == M4AF_FOURCC('c','a','f','f')); + TRY_IO(pcm_skip(&reader->io, 4)); /* mFileVersion, mFileFlags */ + + while ((fcc = caf_next_chunk(reader, &chunk_size)) != 0) { + if (fcc == M4AF_FOURCC('d','e','s','c')) + TRY_IO(caf_desc(reader, chunk_size)); + else if (fcc == M4AF_FOURCC('i','n','f','o')) + TRY_IO(caf_info(reader, chunk_size)); + else if (fcc == M4AF_FOURCC('c','h','a','n')) { + ENSURE(reader->sample_format.channels_per_frame); + if (apple_chan_chunk(&reader->io, chunk_size, + &reader->sample_format, reader->chanmap) < 0) + goto FAIL; + } else if (fcc == M4AF_FOURCC('d','a','t','a')) { + TRY_IO(pcm_skip(&reader->io, 4)); /* mEditCount */ + *data_length = (chunk_size == ~0ULL) ? chunk_size : chunk_size - 4; + reader->data_offset += 12; + break; + } else + TRY_IO(pcm_skip(&reader->io, chunk_size)); + + reader->data_offset += (chunk_size + 8); + } + ENSURE(reader->sample_format.channels_per_frame); + ENSURE(fcc == M4AF_FOURCC('d','a','t','a')); + return 0; +FAIL: + return -1; +} + +static pcm_reader_vtbl_t caf_vtable = { + caf_get_format, + caf_get_length, + caf_get_position, + caf_read_frames, + caf_teardown +}; + +pcm_reader_t *caf_open(pcm_io_context_t *io, + aacenc_tag_callback_t tag_callback, void *tag_ctx) +{ + caf_reader_t *reader = 0; + int64_t data_length; + unsigned bpf; + + if ((reader = calloc(1, sizeof(caf_reader_t))) == 0) + return 0; + memcpy(&reader->io, io, sizeof(pcm_io_context_t)); + reader->tag_callback = tag_callback; + reader->tag_ctx = tag_ctx; + + if (caf_parse(reader, &data_length) < 0) { + free(reader); + return 0; + } + bpf = reader->sample_format.bytes_per_frame; + + /* CAF uses -1 to indicate "unknown size" */ + if (data_length < 0 || data_length % bpf) + reader->length = INT64_MAX; + else + reader->length = data_length / bpf; + + if (reader->length == INT64_MAX) { + if (pcm_seek(&reader->io, 0, SEEK_END) >= 0) { + int64_t size = pcm_tell(&reader->io); + if (size > 0) + reader->length = (size - reader->data_offset) / bpf; + pcm_seek(&reader->io, reader->data_offset, SEEK_SET); + } + } + reader->vtbl = &caf_vtable; + return (pcm_reader_t *)reader; +} diff --git a/src/caf_reader.h b/src/caf_reader.h new file mode 100644 index 0000000..d341531 --- /dev/null +++ b/src/caf_reader.h @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2013 nu774 + * For conditions of distribution and use, see copyright notice in COPYING + */ +#ifndef CAF_READER_H +#define CAF_READER_H + +#include "lpcm.h" +#include "pcm_reader.h" +#include "metadata.h" + +pcm_reader_t *caf_open(pcm_io_context_t *io, + aacenc_tag_callback_t tag_callback, void *tag_ctx); + +#endif diff --git a/src/catypes.h b/src/catypes.h new file mode 100644 index 0000000..cc0fdc9 --- /dev/null +++ b/src/catypes.h @@ -0,0 +1,241 @@ +#if !defined(__CoreAudioTypes_h__) +#define __CoreAudioTypes_h__ + +enum { kVariableLengthArray = 1 }; + +typedef uint32_t AudioChannelLabel; +typedef uint32_t AudioChannelLayoutTag; + +struct AudioChannelDescription +{ + AudioChannelLabel mChannelLabel; + uint32_t mChannelFlags; + float mCoordinates[3]; +}; +typedef struct AudioChannelDescription AudioChannelDescription; + +struct AudioChannelLayout +{ + AudioChannelLayoutTag mChannelLayoutTag; + uint32_t mChannelBitmap; + uint32_t mNumberChannelDescriptions; + AudioChannelDescription mChannelDescriptions[kVariableLengthArray]; +}; +typedef struct AudioChannelLayout AudioChannelLayout; + +enum +{ + kAudioChannelLabel_Unknown = 0xFFFFFFFF, // unknown or unspecified other use + kAudioChannelLabel_Unused = 0, // channel is present, but has no intended use or destination + kAudioChannelLabel_UseCoordinates = 100, // channel is described by the mCoordinates fields. + + kAudioChannelLabel_Left = 1, + kAudioChannelLabel_Right = 2, + kAudioChannelLabel_Center = 3, + kAudioChannelLabel_LFEScreen = 4, + kAudioChannelLabel_LeftSurround = 5, // WAVE: "Back Left" + kAudioChannelLabel_RightSurround = 6, // WAVE: "Back Right" + kAudioChannelLabel_LeftCenter = 7, + kAudioChannelLabel_RightCenter = 8, + kAudioChannelLabel_CenterSurround = 9, // WAVE: "Back Center" or plain "Rear Surround" + kAudioChannelLabel_LeftSurroundDirect = 10, // WAVE: "Side Left" + kAudioChannelLabel_RightSurroundDirect = 11, // WAVE: "Side Right" + kAudioChannelLabel_TopCenterSurround = 12, + kAudioChannelLabel_VerticalHeightLeft = 13, // WAVE: "Top Front Left" + kAudioChannelLabel_VerticalHeightCenter = 14, // WAVE: "Top Front Center" + kAudioChannelLabel_VerticalHeightRight = 15, // WAVE: "Top Front Right" + + kAudioChannelLabel_TopBackLeft = 16, + kAudioChannelLabel_TopBackCenter = 17, + kAudioChannelLabel_TopBackRight = 18, + + kAudioChannelLabel_RearSurroundLeft = 33, + kAudioChannelLabel_RearSurroundRight = 34, + kAudioChannelLabel_LeftWide = 35, + kAudioChannelLabel_RightWide = 36, + kAudioChannelLabel_LFE2 = 37, + kAudioChannelLabel_LeftTotal = 38, // matrix encoded 4 channels + kAudioChannelLabel_RightTotal = 39, // matrix encoded 4 channels + kAudioChannelLabel_HearingImpaired = 40, + kAudioChannelLabel_Narration = 41, + kAudioChannelLabel_Mono = 42, + kAudioChannelLabel_DialogCentricMix = 43, + + kAudioChannelLabel_CenterSurroundDirect = 44, // back center, non diffuse + + kAudioChannelLabel_Haptic = 45, + + // first order ambisonic channels + kAudioChannelLabel_Ambisonic_W = 200, + kAudioChannelLabel_Ambisonic_X = 201, + kAudioChannelLabel_Ambisonic_Y = 202, + kAudioChannelLabel_Ambisonic_Z = 203, + + // Mid/Side Recording + kAudioChannelLabel_MS_Mid = 204, + kAudioChannelLabel_MS_Side = 205, + + // X-Y Recording + kAudioChannelLabel_XY_X = 206, + kAudioChannelLabel_XY_Y = 207, + + // other + kAudioChannelLabel_HeadphonesLeft = 301, + kAudioChannelLabel_HeadphonesRight = 302, + kAudioChannelLabel_ClickTrack = 304, + kAudioChannelLabel_ForeignLanguage = 305, + + // generic discrete channel + kAudioChannelLabel_Discrete = 400, + + // numbered discrete channel + kAudioChannelLabel_Discrete_0 = (1L<<16) | 0, + kAudioChannelLabel_Discrete_1 = (1L<<16) | 1, + kAudioChannelLabel_Discrete_2 = (1L<<16) | 2, + kAudioChannelLabel_Discrete_3 = (1L<<16) | 3, + kAudioChannelLabel_Discrete_4 = (1L<<16) | 4, + kAudioChannelLabel_Discrete_5 = (1L<<16) | 5, + kAudioChannelLabel_Discrete_6 = (1L<<16) | 6, + kAudioChannelLabel_Discrete_7 = (1L<<16) | 7, + kAudioChannelLabel_Discrete_8 = (1L<<16) | 8, + kAudioChannelLabel_Discrete_9 = (1L<<16) | 9, + kAudioChannelLabel_Discrete_10 = (1L<<16) | 10, + kAudioChannelLabel_Discrete_11 = (1L<<16) | 11, + kAudioChannelLabel_Discrete_12 = (1L<<16) | 12, + kAudioChannelLabel_Discrete_13 = (1L<<16) | 13, + kAudioChannelLabel_Discrete_14 = (1L<<16) | 14, + kAudioChannelLabel_Discrete_15 = (1L<<16) | 15, + kAudioChannelLabel_Discrete_65535 = (1L<<16) | 65535 +}; + +#define AudioChannelLayoutTag_GetNumberOfChannels(layoutTag) \ + ((uint32_t)((layoutTag) & 0x0000FFFF)) + +enum +{ + kAudioChannelLayoutTag_UseChannelDescriptions = (0L<<16) | 0, // use the array of AudioChannelDescriptions to define the mapping. + kAudioChannelLayoutTag_UseChannelBitmap = (1L<<16) | 0, // use the bitmap to define the mapping. + + kAudioChannelLayoutTag_Mono = (100L<<16) | 1, // a standard mono stream + kAudioChannelLayoutTag_Stereo = (101L<<16) | 2, // a standard stereo stream (L R) - implied playback + kAudioChannelLayoutTag_StereoHeadphones = (102L<<16) | 2, // a standard stereo stream (L R) - implied headphone playbac + kAudioChannelLayoutTag_MatrixStereo = (103L<<16) | 2, // a matrix encoded stereo stream (Lt, Rt) + kAudioChannelLayoutTag_MidSide = (104L<<16) | 2, // mid/side recording + kAudioChannelLayoutTag_XY = (105L<<16) | 2, // coincident mic pair (often 2 figure 8's) + kAudioChannelLayoutTag_Binaural = (106L<<16) | 2, // binaural stereo (left, right) + kAudioChannelLayoutTag_Ambisonic_B_Format = (107L<<16) | 4, // W, X, Y, Z + + kAudioChannelLayoutTag_Quadraphonic = (108L<<16) | 4, // front left, front right, back left, back right + + kAudioChannelLayoutTag_Pentagonal = (109L<<16) | 5, // left, right, rear left, rear right, center + + kAudioChannelLayoutTag_Hexagonal = (110L<<16) | 6, // left, right, rear left, rear right, center, rear + + kAudioChannelLayoutTag_Octagonal = (111L<<16) | 8, // front left, front right, rear left, rear right, + // front center, rear center, side left, side right + + kAudioChannelLayoutTag_Cube = (112L<<16) | 8, // left, right, rear left, rear right + // top left, top right, top rear left, top rear right + + // MPEG defined layouts + kAudioChannelLayoutTag_MPEG_1_0 = kAudioChannelLayoutTag_Mono, // C + kAudioChannelLayoutTag_MPEG_2_0 = kAudioChannelLayoutTag_Stereo, // L R + kAudioChannelLayoutTag_MPEG_3_0_A = (113L<<16) | 3, // L R C + kAudioChannelLayoutTag_MPEG_3_0_B = (114L<<16) | 3, // C L R + kAudioChannelLayoutTag_MPEG_4_0_A = (115L<<16) | 4, // L R C Cs + kAudioChannelLayoutTag_MPEG_4_0_B = (116L<<16) | 4, // C L R Cs + kAudioChannelLayoutTag_MPEG_5_0_A = (117L<<16) | 5, // L R C Ls Rs + kAudioChannelLayoutTag_MPEG_5_0_B = (118L<<16) | 5, // L R Ls Rs C + kAudioChannelLayoutTag_MPEG_5_0_C = (119L<<16) | 5, // L C R Ls Rs + kAudioChannelLayoutTag_MPEG_5_0_D = (120L<<16) | 5, // C L R Ls Rs + kAudioChannelLayoutTag_MPEG_5_1_A = (121L<<16) | 6, // L R C LFE Ls Rs + kAudioChannelLayoutTag_MPEG_5_1_B = (122L<<16) | 6, // L R Ls Rs C LFE + kAudioChannelLayoutTag_MPEG_5_1_C = (123L<<16) | 6, // L C R Ls Rs LFE + kAudioChannelLayoutTag_MPEG_5_1_D = (124L<<16) | 6, // C L R Ls Rs LFE + kAudioChannelLayoutTag_MPEG_6_1_A = (125L<<16) | 7, // L R C LFE Ls Rs Cs + kAudioChannelLayoutTag_MPEG_7_1_A = (126L<<16) | 8, // L R C LFE Ls Rs Lc Rc + kAudioChannelLayoutTag_MPEG_7_1_B = (127L<<16) | 8, // C Lc Rc L R Ls Rs LFE (doc: IS-13818-7 MPEG2-AAC Table 3.1) + kAudioChannelLayoutTag_MPEG_7_1_C = (128L<<16) | 8, // L R C LFE Ls Rs Rls Rrs + kAudioChannelLayoutTag_Emagic_Default_7_1 = (129L<<16) | 8, // L R Ls Rs C LFE Lc Rc + kAudioChannelLayoutTag_SMPTE_DTV = (130L<<16) | 8, // L R C LFE Ls Rs Lt Rt + // (kAudioChannelLayoutTag_ITU_5_1 plus a matrix encoded stereo mix) + + // ITU defined layouts + kAudioChannelLayoutTag_ITU_1_0 = kAudioChannelLayoutTag_Mono, // C + kAudioChannelLayoutTag_ITU_2_0 = kAudioChannelLayoutTag_Stereo, // L R + + kAudioChannelLayoutTag_ITU_2_1 = (131L<<16) | 3, // L R Cs + kAudioChannelLayoutTag_ITU_2_2 = (132L<<16) | 4, // L R Ls Rs + kAudioChannelLayoutTag_ITU_3_0 = kAudioChannelLayoutTag_MPEG_3_0_A, // L R C + kAudioChannelLayoutTag_ITU_3_1 = kAudioChannelLayoutTag_MPEG_4_0_A, // L R C Cs + + kAudioChannelLayoutTag_ITU_3_2 = kAudioChannelLayoutTag_MPEG_5_0_A, // L R C Ls Rs + kAudioChannelLayoutTag_ITU_3_2_1 = kAudioChannelLayoutTag_MPEG_5_1_A, // L R C LFE Ls Rs + kAudioChannelLayoutTag_ITU_3_4_1 = kAudioChannelLayoutTag_MPEG_7_1_C, // L R C LFE Ls Rs Rls Rrs + + // DVD defined layouts + kAudioChannelLayoutTag_DVD_0 = kAudioChannelLayoutTag_Mono, // C (mono) + kAudioChannelLayoutTag_DVD_1 = kAudioChannelLayoutTag_Stereo, // L R + kAudioChannelLayoutTag_DVD_2 = kAudioChannelLayoutTag_ITU_2_1, // L R Cs + kAudioChannelLayoutTag_DVD_3 = kAudioChannelLayoutTag_ITU_2_2, // L R Ls Rs + kAudioChannelLayoutTag_DVD_4 = (133L<<16) | 3, // L R LFE + kAudioChannelLayoutTag_DVD_5 = (134L<<16) | 4, // L R LFE Cs + kAudioChannelLayoutTag_DVD_6 = (135L<<16) | 5, // L R LFE Ls Rs + kAudioChannelLayoutTag_DVD_7 = kAudioChannelLayoutTag_MPEG_3_0_A, // L R C + kAudioChannelLayoutTag_DVD_8 = kAudioChannelLayoutTag_MPEG_4_0_A, // L R C Cs + kAudioChannelLayoutTag_DVD_9 = kAudioChannelLayoutTag_MPEG_5_0_A, // L R C Ls Rs + kAudioChannelLayoutTag_DVD_10 = (136L<<16) | 4, // L R C LFE + kAudioChannelLayoutTag_DVD_11 = (137L<<16) | 5, // L R C LFE Cs + kAudioChannelLayoutTag_DVD_12 = kAudioChannelLayoutTag_MPEG_5_1_A, // L R C LFE Ls Rs + // 13 through 17 are duplicates of 8 through 12. + kAudioChannelLayoutTag_DVD_13 = kAudioChannelLayoutTag_DVD_8, // L R C Cs + kAudioChannelLayoutTag_DVD_14 = kAudioChannelLayoutTag_DVD_9, // L R C Ls Rs + kAudioChannelLayoutTag_DVD_15 = kAudioChannelLayoutTag_DVD_10, // L R C LFE + kAudioChannelLayoutTag_DVD_16 = kAudioChannelLayoutTag_DVD_11, // L R C LFE Cs + kAudioChannelLayoutTag_DVD_17 = kAudioChannelLayoutTag_DVD_12, // L R C LFE Ls Rs + kAudioChannelLayoutTag_DVD_18 = (138L<<16) | 5, // L R Ls Rs LFE + kAudioChannelLayoutTag_DVD_19 = kAudioChannelLayoutTag_MPEG_5_0_B, // L R Ls Rs C + kAudioChannelLayoutTag_DVD_20 = kAudioChannelLayoutTag_MPEG_5_1_B, // L R Ls Rs C LFE + + // These layouts are recommended for AudioUnit usage + // These are the symmetrical layouts + kAudioChannelLayoutTag_AudioUnit_4 = kAudioChannelLayoutTag_Quadraphonic, + kAudioChannelLayoutTag_AudioUnit_5 = kAudioChannelLayoutTag_Pentagonal, + kAudioChannelLayoutTag_AudioUnit_6 = kAudioChannelLayoutTag_Hexagonal, + kAudioChannelLayoutTag_AudioUnit_8 = kAudioChannelLayoutTag_Octagonal, + // These are the surround-based layouts + kAudioChannelLayoutTag_AudioUnit_5_0 = kAudioChannelLayoutTag_MPEG_5_0_B, // L R Ls Rs C + kAudioChannelLayoutTag_AudioUnit_6_0 = (139L<<16) | 6, // L R Ls Rs C Cs + kAudioChannelLayoutTag_AudioUnit_7_0 = (140L<<16) | 7, // L R Ls Rs C Rls Rrs + kAudioChannelLayoutTag_AudioUnit_7_0_Front = (148L<<16) | 7, // L R Ls Rs C Lc Rc + kAudioChannelLayoutTag_AudioUnit_5_1 = kAudioChannelLayoutTag_MPEG_5_1_A, // L R C LFE Ls Rs + kAudioChannelLayoutTag_AudioUnit_6_1 = kAudioChannelLayoutTag_MPEG_6_1_A, // L R C LFE Ls Rs Cs + kAudioChannelLayoutTag_AudioUnit_7_1 = kAudioChannelLayoutTag_MPEG_7_1_C, // L R C LFE Ls Rs Rls Rrs + kAudioChannelLayoutTag_AudioUnit_7_1_Front = kAudioChannelLayoutTag_MPEG_7_1_A, // L R C LFE Ls Rs Lc Rc + + kAudioChannelLayoutTag_AAC_3_0 = kAudioChannelLayoutTag_MPEG_3_0_B, // C L R + kAudioChannelLayoutTag_AAC_Quadraphonic = kAudioChannelLayoutTag_Quadraphonic, // L R Ls Rs + kAudioChannelLayoutTag_AAC_4_0 = kAudioChannelLayoutTag_MPEG_4_0_B, // C L R Cs + kAudioChannelLayoutTag_AAC_5_0 = kAudioChannelLayoutTag_MPEG_5_0_D, // C L R Ls Rs + kAudioChannelLayoutTag_AAC_5_1 = kAudioChannelLayoutTag_MPEG_5_1_D, // C L R Ls Rs Lfe + kAudioChannelLayoutTag_AAC_6_0 = (141L<<16) | 6, // C L R Ls Rs Cs + kAudioChannelLayoutTag_AAC_6_1 = (142L<<16) | 7, // C L R Ls Rs Cs Lfe + kAudioChannelLayoutTag_AAC_7_0 = (143L<<16) | 7, // C L R Ls Rs Rls Rrs + kAudioChannelLayoutTag_AAC_7_1 = kAudioChannelLayoutTag_MPEG_7_1_B, // C Lc Rc L R Ls Rs Lfe + kAudioChannelLayoutTag_AAC_Octagonal = (144L<<16) | 8, // C L R Ls Rs Rls Rrs Cs + + kAudioChannelLayoutTag_TMH_10_2_std = (145L<<16) | 16, // L R C Vhc Lsd Rsd Ls Rs Vhl Vhr Lw Rw Csd Cs LFE1 LFE2 + kAudioChannelLayoutTag_TMH_10_2_full = (146L<<16) | 21, // TMH_10_2_std plus: Lc Rc HI VI Haptic + + kAudioChannelLayoutTag_AC3_1_0_1 = (149L<<16) | 2, // C LFE + kAudioChannelLayoutTag_AC3_3_0 = (150L<<16) | 3, // L C R + kAudioChannelLayoutTag_AC3_3_1 = (151L<<16) | 4, // L C R Cs + kAudioChannelLayoutTag_AC3_3_0_1 = (152L<<16) | 4, // L C R LFE + kAudioChannelLayoutTag_AC3_2_1_1 = (153L<<16) | 4, // L R Cs LFE + kAudioChannelLayoutTag_AC3_3_1_1 = (154L<<16) | 5, // L C R Cs LFE + + kAudioChannelLayoutTag_DiscreteInOrder = (147L<<16) | 0, // needs to be ORed with the actual number of channels + kAudioChannelLayoutTag_Unknown = 0xFFFF0000 // needs to be ORed with the actual number of channels +}; + +#endif diff --git a/src/main.c b/src/main.c index a178070..d8ee02d 100644 --- a/src/main.c +++ b/src/main.c @@ -34,6 +34,7 @@ #endif #include "compat.h" #include "wav_reader.h" +#include "caf_reader.h" #include "aacenc.h" #include "m4af.h" #include "progress.h" @@ -217,6 +218,8 @@ typedef struct aacenc_param_ex_t { const char *raw_format; aacenc_tag_store_t tags; + aacenc_tag_store_t source_tags; + aacenc_translate_generic_text_tag_ctx_t source_tag_ctx; char *json_filename; } aacenc_param_ex_t; @@ -570,11 +573,16 @@ int finalize_m4a(m4af_ctx_t *m4af, const aacenc_param_ex_t *params, HANDLE_AACENCODER encoder) { unsigned i; - aacenc_tag_entry_t *tag = params->tags.tag_table; + aacenc_tag_entry_t *tag; + + tag = params->source_tags.tag_table; + for (i = 0; i < params->source_tags.tag_count; ++i, ++tag) + aacenc_write_tag_entry(m4af, tag); if (params->json_filename) aacenc_write_tags_from_json(m4af, params->json_filename); + tag = params->tags.tag_table; for (i = 0; i < params->tags.tag_count; ++i, ++tag) aacenc_write_tag_entry(m4af, tag); @@ -683,8 +691,27 @@ pcm_reader_t *open_input(aacenc_param_ex_t *params) goto END; } } else { - if ((reader = wav_open(&io, params->ignore_length)) == 0) { - fprintf(stderr, "ERROR: broken / unsupported input file\n"); + int c; + ungetc(c = getc(params->input_fp), params->input_fp); + + switch (c) { + case 'R': + if ((reader = wav_open(&io, params->ignore_length)) == 0) { + fprintf(stderr, "ERROR: broken / unsupported input file\n"); + goto END; + } + break; + case 'c': + params->source_tag_ctx.add = aacenc_add_tag_entry_to_store; + params->source_tag_ctx.add_ctx = ¶ms->source_tags; + if ((reader = caf_open(&io, + aacenc_translate_generic_text_tag, + ¶ms->source_tag_ctx)) == 0) { + fprintf(stderr, "ERROR: broken / unsupported input file\n"); + goto END; + } + break; + default: goto END; } } @@ -778,7 +805,10 @@ END: if (params.output_fp) fclose(params.output_fp); if (encoder) aacEncClose(&encoder); if (output_filename) free(output_filename); - if (params.tags.tag_table) aacenc_free_tag_store(¶ms.tags); + if (params.tags.tag_table) + aacenc_free_tag_store(¶ms.tags); + if (params.source_tags.tag_table) + aacenc_free_tag_store(¶ms.source_tags); return result; } diff --git a/src/metadata.c b/src/metadata.c index f2d4fb2..3035012 100644 --- a/src/metadata.c +++ b/src/metadata.c @@ -50,12 +50,13 @@ static tag_key_mapping_t tag_mapping_table[] = { { "copyright", M4AF_TAG_COPYRIGHT }, { "date", M4AF_TAG_DATE }, { "disc", M4AF_TAG_DISK }, - { "disctotal", TAG_TOTAL_DISCS }, { "discnumber", M4AF_TAG_DISK }, + { "disctotal", TAG_TOTAL_DISCS }, { "genre", M4AF_TAG_GENRE }, { "grouping", M4AF_TAG_GROUPING }, { "itunescompilation", M4AF_TAG_COMPILATION }, { "lyrics", M4AF_TAG_LYRICS }, + { "performer", M4AF_TAG_ARTIST }, { "title", M4AF_TAG_TITLE }, { "titlesort", M4AF_FOURCC('s','o','n','m') }, { "titlesortorder", M4AF_FOURCC('s','o','n','m') }, diff --git a/src/pcm_reader.h b/src/pcm_reader.h index 61debad..b9490a4 100644 --- a/src/pcm_reader.h +++ b/src/pcm_reader.h @@ -62,13 +62,26 @@ void pcm_teardown(pcm_reader_t **r) (*r)->vtbl->teardown(r); } -pcm_reader_t *pcm_open_sint16_converter(pcm_reader_t *reader); +static inline +uint32_t bitcount(uint32_t bits) +{ + bits = (bits & 0x55555555) + (bits >> 1 & 0x55555555); + bits = (bits & 0x33333333) + (bits >> 2 & 0x33333333); + bits = (bits & 0x0f0f0f0f) + (bits >> 4 & 0x0f0f0f0f); + bits = (bits & 0x00ff00ff) + (bits >> 8 & 0x00ff00ff); + return (bits & 0x0000ffff) + (bits >>16 & 0x0000ffff); +} #define TRY_IO(expr) \ do { \ if ((expr)) goto FAIL; \ } while (0) +#define ENSURE(expr) \ + do { \ + if (!(expr)) goto FAIL;\ + } while (0) + int pcm_read(pcm_io_context_t *io, void *buffer, uint32_t size); int pcm_skip(pcm_io_context_t *io, int64_t count); @@ -91,4 +104,9 @@ int pcm_read64be(pcm_io_context_t *io, uint64_t *value); int pcm_scanl(pcm_io_context_t *io, const char *fmt, ...); int pcm_scanb(pcm_io_context_t *io, const char *fmt, ...); +int apple_chan_chunk(pcm_io_context_t *io, uint32_t chunk_size, + pcm_sample_description_t *fmt, uint8_t *mapping); + +pcm_reader_t *pcm_open_sint16_converter(pcm_reader_t *reader); + #endif diff --git a/src/pcm_readhelper.c b/src/pcm_readhelper.c index 70b47f5..5a2b037 100644 --- a/src/pcm_readhelper.c +++ b/src/pcm_readhelper.c @@ -12,9 +12,12 @@ #endif #include +#include +#include #include #include "pcm_reader.h" #include "m4af_endian.h" +#include "catypes.h" int pcm_read(pcm_io_context_t *io, void *buffer, uint32_t size) { @@ -152,3 +155,202 @@ FAIL: va_end(ap); return count; } + +static +int channel_compare(const void *a, const void *b) +{ + return (*(const uint8_t **)a)[0] - (*(const uint8_t **)b)[0]; +} + +void apple_translate_channel_labels(uint8_t *channels, unsigned n) +{ + unsigned i; + char *has_side = strpbrk((char*)channels, "\x0A\x0B"); + + for (i = 0; i < n; ++i) { + switch (channels[i]) { + case kAudioChannelLabel_LeftSurround: + case kAudioChannelLabel_RightSurround: + if (!has_side) channels[i] += 5; // map to SL/SR + break; + case kAudioChannelLabel_RearSurroundLeft: + case kAudioChannelLabel_RearSurroundRight: + if (!has_side) channels[i] -= 28; // map to BL/BR + break; + case kAudioChannelLabel_Mono: + channels[i] = kAudioChannelLabel_Center; + break; + } + } +} + +int apple_chan_chunk(pcm_io_context_t *io, uint32_t chunk_size, + pcm_sample_description_t *fmt, uint8_t *mapping) +{ + /* + * Although FDK encoder supports upto 5.1ch, we handle upto + * 8 channels here. + */ + uint32_t i, mChannelLayoutTag, mChannelBitmap, mNumberChannelDescriptions; + uint32_t mask = 0; + const uint32_t nchannels = fmt->channels_per_frame; + uint8_t channels[9] = { 0 }; + uint8_t *index[8] = { 0 }; + const char *layout = 0; + + ENSURE(chunk_size >= 12); + TRY_IO(pcm_scanb(io, "LLL", &mChannelLayoutTag, &mChannelBitmap, + &mNumberChannelDescriptions) != 3); + + switch (mChannelLayoutTag) { + case kAudioChannelLayoutTag_UseChannelBitmap: + ENSURE(bitcount(mask) == nchannels); + TRY_IO(pcm_skip(io, chunk_size - 12)); + fmt->channel_mask = mChannelBitmap; + for (i = 0; i < nchannels; ++i) + mapping[i] = i; + return 0; + case kAudioChannelLayoutTag_UseChannelDescriptions: + ENSURE(mNumberChannelDescriptions == nchannels); + ENSURE(chunk_size >= 12 + nchannels * 20); + for (i = 0; i < mNumberChannelDescriptions; ++i) { + uint32_t mChannelLabel; + TRY_IO(pcm_read32be(io, &mChannelLabel)); + ENSURE(mChannelLabel && mChannelLabel <= 0xff); + channels[i] = mChannelLabel; + TRY_IO(pcm_skip(io, 16)); + } + TRY_IO(pcm_skip(io, chunk_size - 12 - nchannels * 20)); + apple_translate_channel_labels(channels, nchannels); + for (i = 0; i < nchannels; ++i) + if (channels[i] > kAudioChannelLabel_TopBackLeft) + goto FAIL; + break; + default: + ENSURE((mChannelLayoutTag & 0xffff) == nchannels); + TRY_IO(pcm_skip(io, chunk_size - 12)); + + switch (mChannelLayoutTag) { + /* 1ch */ + case kAudioChannelLayoutTag_Mono: + layout = "\x03"; break; + /* 1.1ch */ + case kAudioChannelLayoutTag_AC3_1_0_1: + layout = "\x03\x04"; break; + /* 2ch */ + case kAudioChannelLayoutTag_Stereo: + case kAudioChannelLayoutTag_MatrixStereo: + case kAudioChannelLayoutTag_Binaural: + layout = "\x01\x02"; break; + /* 2.1ch */ + case kAudioChannelLayoutTag_DVD_4: + layout = "\x01\x02\x04"; break; + /* 3ch */ + case kAudioChannelLayoutTag_MPEG_3_0_A: + layout = "\x01\x02\x03"; break; + case kAudioChannelLayoutTag_AC3_3_0: + layout = "\x01\x03\x02"; break; + case kAudioChannelLayoutTag_MPEG_3_0_B: + layout = "\x03\x01\x02"; break; + case kAudioChannelLayoutTag_ITU_2_1: + layout = "\x01\x02\x09"; break; + /* 3.1ch */ + case kAudioChannelLayoutTag_DVD_10: + layout = "\x01\x02\x03\x04"; break; + case kAudioChannelLayoutTag_AC3_3_0_1: + layout = "\x01\x03\x02\x04"; break; + case kAudioChannelLayoutTag_DVD_5: + layout = "\x01\x02\x04\x09"; break; + case kAudioChannelLayoutTag_AC3_2_1_1: + layout = "\x01\x02\x09\x04"; break; + /* 4ch */ + case kAudioChannelLayoutTag_Quadraphonic: + case kAudioChannelLayoutTag_ITU_2_2: + layout = "\x01\x02\x0A\x0B"; break; + case kAudioChannelLayoutTag_MPEG_4_0_A: + layout = "\x01\x02\x03\x09"; break; + case kAudioChannelLayoutTag_MPEG_4_0_B: + layout = "\x03\x01\x02\x09"; break; + case kAudioChannelLayoutTag_AC3_3_1: + layout = "\x01\x03\x02\x09"; break; + /* 4.1ch */ + case kAudioChannelLayoutTag_DVD_6: + layout = "\x01\x02\x04\x0A\x0B"; break; + case kAudioChannelLayoutTag_DVD_18: + layout = "\x01\x02\x0A\x0B\x04"; break; + case kAudioChannelLayoutTag_DVD_11: + layout = "\x01\x02\x03\x04\x09"; break; + case kAudioChannelLayoutTag_AC3_3_1_1: + layout = "\x01\x03\x02\x09\x04"; break; + /* 5ch */ + case kAudioChannelLayoutTag_MPEG_5_0_A: + layout = "\x01\x02\x03\x0A\x0B"; break; + case kAudioChannelLayoutTag_Pentagonal: + case kAudioChannelLayoutTag_MPEG_5_0_B: + layout = "\x01\x02\x0A\x0B\x03"; break; + case kAudioChannelLayoutTag_MPEG_5_0_C: + layout = "\x01\x03\x02\x0A\x0B"; break; + case kAudioChannelLayoutTag_MPEG_5_0_D: + layout = "\x03\x01\x02\x0A\x0B"; break; + /* 5.1ch */ + case kAudioChannelLayoutTag_MPEG_5_1_A: + layout = "\x01\x02\x03\x04\x0A\x0B"; break; + case kAudioChannelLayoutTag_MPEG_5_1_B: + layout = "\x01\x02\x0A\x0B\x03\x04"; break; + case kAudioChannelLayoutTag_MPEG_5_1_C: + layout = "\x01\x03\x02\x0A\x0B\x04"; break; + case kAudioChannelLayoutTag_MPEG_5_1_D: + layout = "\x03\x01\x02\x0A\x0B\x04"; break; + /* 6ch */ + case kAudioChannelLayoutTag_Hexagonal: + case kAudioChannelLayoutTag_AudioUnit_6_0: + layout = "\x01\x02\x0A\x0B\x03\x09"; break; + case kAudioChannelLayoutTag_AAC_6_0: + layout = "\x03\x01\x02\x0A\x0B\x09"; break; + /* 6.1ch */ + case kAudioChannelLayoutTag_MPEG_6_1_A: + layout = "\x01\x02\x03\x04\x0A\x0B\x09"; break; + case kAudioChannelLayoutTag_AAC_6_1: + layout = "\x03\x01\x02\x0A\x0B\x09\x04"; break; + /* 7ch */ + case kAudioChannelLayoutTag_AudioUnit_7_0: + layout = "\x01\x02\x0A\x0B\x03\x05\x06"; break; + case kAudioChannelLayoutTag_AudioUnit_7_0_Front: + layout = "\x01\x02\x0A\x0B\x03\x07\x08"; break; + case kAudioChannelLayoutTag_AAC_7_0: + layout = "\x03\x01\x02\x0A\x0B\x05\x06"; break; + /* 7.1ch */ + case kAudioChannelLayoutTag_MPEG_7_1_A: + layout = "\x01\x02\x03\x04\x0A\x0B\x07\x08"; break; + case kAudioChannelLayoutTag_MPEG_7_1_B: + layout = "\x03\x07\x08\x01\x02\x05\x06\x04"; break; + case kAudioChannelLayoutTag_MPEG_7_1_C: + layout = "\x01\x02\x03\x04\x0A\x0B\x05\x06"; break; + case kAudioChannelLayoutTag_Emagic_Default_7_1: + layout = "\x01\x02\x0A\x0B\x03\x04\x07\x08"; break; + /* 8ch */ + case kAudioChannelLayoutTag_Octagonal: + layout = "\x01\x02\x05\x06\x03\x09\x0A\x0B"; break; + case kAudioChannelLayoutTag_AAC_Octagonal: + layout = "\x03\x01\x02\x0A\x0B\x05\x06\x09"; break; + default: + goto FAIL; + } + strcpy((char*)channels, layout); + } + + for (i = 0; i < nchannels; ++i) + mask |= 1 << (channels[i] - 1); + fmt->channel_mask = mask; + ENSURE(bitcount(mask) == nchannels); + + for (i = 0; i < nchannels; ++i) + index[i] = channels + i; + qsort(index, nchannels, sizeof(char*), channel_compare); + for (i = 0; i < nchannels; ++i) + mapping[i] = index[i] - channels; + + return 0; +FAIL: + return -1; +} diff --git a/src/wav_reader.c b/src/wav_reader.c index 133854b..e69cba7 100644 --- a/src/wav_reader.c +++ b/src/wav_reader.c @@ -18,12 +18,7 @@ #define RIFF_FOURCC(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) -#define ENSURE(expr) \ - do { \ - if (!(expr)) goto FAIL;\ - } while (0) - -struct wav_reader_t { +typedef struct wav_reader_t { pcm_reader_vtbl_t *vtbl; pcm_sample_description_t sample_format; int64_t length; @@ -31,7 +26,7 @@ struct wav_reader_t { int32_t data_offset; int ignore_length; pcm_io_context_t io; -}; +} wav_reader_t; static const uint8_t WAV_GUID_PCM[] = { 1, 0, 0, 0, 0, 0, 0x10, 0, 0x80, 0, 0, 0xaa, 0, 0x38, 0x9b, 0x71 diff --git a/src/wav_reader.h b/src/wav_reader.h index 225064a..2a69df1 100644 --- a/src/wav_reader.h +++ b/src/wav_reader.h @@ -8,8 +8,6 @@ #include "lpcm.h" #include "pcm_reader.h" -typedef struct wav_reader_t wav_reader_t; - pcm_reader_t *wav_open(pcm_io_context_t *io, int ignore_length); pcm_reader_t *raw_open(pcm_io_context_t *io, const pcm_sample_description_t *desc); -- 2.30.2