add --sbr-ratio to support AACENC_SBR_RATIO appeared on libFDK 3.4.12

[fdkaac.git] / src / main.c
diff --git a/src/main.c b/src/main.c

index 0d076907631df6f8b76c5589350594120db7988e..8521a33ebd9791b174356c8cf24386f43c6205ea 100644 (file)
--- a/src/main.c
+++ b/src/main.c
@@ -132,7 +132,14 @@ PROGNAME " %s\n"
  " -a, --afterburner <n>         Afterburner\n"
  "                                 0: Off\n"
  "                                 1: On(default)\n"
-" -L, --lowdelay-sbr            Enable ELD-SBR (AAC ELD only)\n"
+" -L, --lowdelay-sbr <-1|0|1>   Configure SBR activity on AAC ELD\n"
+"                                -1: Use ELD SBR auto configurator\n"
+"                                 0: Disable SBR on ELD (default)\n"
+"                                 1: Enable SBR on ELD\n"
+" -s, --sbr-ratio <0|1|2>       Controls activation of downsampled SBR\n"
+"                                 0: Use lib default (default)\n"
+"                                 1: downsampled SBR (default for ELD+SBR)\n"
+"                                 2: dual-rate SBR (default for HE-AAC)\n"
  " -f, --transport-format <n>    Transport format\n"
  "                                 0: RAW (default, muxed into M4A)\n"
  "                                 1: ADIF\n"
@@ -228,7 +235,7 @@ static
  int parse_options(int argc, char **argv, aacenc_param_ex_t *params)
  {
      int ch;
-    unsigned n;
+    int n;
  
  #define OPT_INCLUDE_SBR_DELAY    M4AF_FOURCC('s','d','l','y')
  #define OPT_MOOV_BEFORE_MDAT     M4AF_FOURCC('m','o','o','v')
@@ -247,7 +254,8 @@ int parse_options(int argc, char **argv, aacenc_param_ex_t *params)
          { "bitrate-mode",     required_argument, 0, 'm' },
          { "bandwidth",        required_argument, 0, 'w' },
          { "afterburner",      required_argument, 0, 'a' },
-        { "lowdelay-sbr",     no_argument,       0, 'L' },
+        { "lowdelay-sbr",     required_argument, 0, 'L' },
+        { "sbr-ratio",        required_argument, 0, 's' },
          { "transport-format", required_argument, 0, 'f' },
          { "adts-crc-check",   no_argument,       0, 'C' },
          { "header-period",    required_argument, 0, 'P' },
@@ -325,7 +333,18 @@ int parse_options(int argc, char **argv, aacenc_param_ex_t *params)
              params->afterburner = n;
              break;
          case 'L':
-            params->lowdelay_sbr = 1;
+            if (sscanf(optarg, "%d", &n) != 1 || n < -1 || n > 1) {
+                fprintf(stderr, "invalid arg for lowdelay-sbr\n");
+                return -1;
+            }
+            params->lowdelay_sbr = n;
+            break;
+        case 's':
+            if (sscanf(optarg, "%u", &n) != 1 || n > 2) {
+                fprintf(stderr, "invalid arg for sbr-ratio\n");
+                return -1;
+            }
+            params->sbr_ratio = n;
              break;
          case 'f':
              if (sscanf(optarg, "%u", &n) != 1) {
@@ -472,16 +491,15 @@ int parse_options(int argc, char **argv, aacenc_param_ex_t *params)
  };
  
  static
-int write_sample(FILE *ofp, m4af_ctx_t *m4af,
-                 const void *data, uint32_t size, uint32_t duration)
+int write_sample(FILE *ofp, m4af_ctx_t *m4af, aacenc_frame_t *frame)
  {
      if (!m4af) {
-        fwrite(data, 1, size, ofp);
+        fwrite(frame->data, 1, frame->size, ofp);
          if (ferror(ofp)) {
              fprintf(stderr, "ERROR: fwrite(): %s\n", strerror(errno));
              return -1;
          }
-    } else if (m4af_write_sample(m4af, 0, data, size, duration) < 0) {
+    } else if (m4af_write_sample(m4af, 0, frame->data, frame->size, 0) < 0) {
          fprintf(stderr, "ERROR: failed to write m4a sample\n");
          return -1;
      }
@@ -489,51 +507,76 @@ int write_sample(FILE *ofp, m4af_ctx_t *m4af,
  }
  
  static
-int encode(pcm_reader_t *reader, HANDLE_AACENCODER encoder,
-           uint32_t frame_length, FILE *ofp, m4af_ctx_t *m4af,
-           int show_progress)
+int encode(aacenc_param_ex_t *params, pcm_reader_t *reader,
+           HANDLE_AACENCODER encoder, uint32_t frame_length, 
+           m4af_ctx_t *m4af)
  {
-    int16_t *ibuf = 0;
-    uint8_t *obuf = 0;
-    uint32_t olen;
-    uint32_t osize = 0;
+    int16_t *ibuf = 0, *ip;
+    aacenc_frame_t obuf[2] = {{ 0 }}, *obp;
+    unsigned flip = 0;
      int nread = 1;
-    int consumed;
      int rc = -1;
-    int frames_written = 0;
+    int remaining, consumed;
+    int frames_written = 0, encoded = 0;
      aacenc_progress_t progress = { 0 };
      const pcm_sample_description_t *fmt = pcm_get_format(reader);
  
      ibuf = malloc(frame_length * fmt->bytes_per_frame);
      aacenc_progress_init(&progress, pcm_get_length(reader), fmt->sample_rate);
-    do {
+
+    for (;;) {
+        /*
+         * Since we delay the write, we cannot just exit loop when interrupted.
+         * Instead, we regard it as EOF.
+         */
          if (g_interrupted)
              nread = 0;
-        else if (nread) {
+        if (nread > 0) {
              if ((nread = pcm_read_frames(reader, ibuf, frame_length)) < 0) {
                  fprintf(stderr, "ERROR: read failed\n");
                  goto END;
              }
-            if (show_progress)
+            if (!params->silent)
                  aacenc_progress_update(&progress, pcm_get_position(reader),
                                         fmt->sample_rate * 2);
          }
-        if ((consumed = aac_encode_frame(encoder, fmt, ibuf, nread,
-                                         &obuf, &olen, &osize)) < 0)
-            goto END;
-        if (olen > 0) {
-            if (write_sample(ofp, m4af, obuf, olen, frame_length) < 0)
+        ip = ibuf;
+        remaining = nread;
+        do {
+            obp = &obuf[flip];
+            consumed = aac_encode_frame(encoder, fmt, ip, remaining, obp);
+            if (consumed < 0) goto END;
+            if (consumed == 0 && obp->size == 0) goto DONE;
+            if (obp->size == 0) break;
+
+            remaining -= consumed;
+            ip += consumed * fmt->channels_per_frame;
+            flip ^= 1;
+            /*
+             * As we pad 1 frame at beginning and ending by our extrapolator,
+             * we want to drop them.
+             * We delay output by 1 frame by double buffering, and discard
+             * second frame and final frame from the encoder.
+             * Since sbr_header is included in the first frame (in case of
+             * SBR), we cannot discard first frame. So we pick second instead.
+             */
+            ++encoded;
+            if (encoded == 1 || encoded == 3)
+                continue;
+            obp = &obuf[flip];
+            if (write_sample(params->output_fp, m4af, obp) < 0)
                  goto END;
              ++frames_written;
-        }
-    } while (nread > 0 || olen > 0);
-
-    if (show_progress)
+        } while (remaining > 0);
+    }
+DONE:
+    if (!params->silent)
          aacenc_progress_finish(&progress, pcm_get_position(reader));
      rc = frames_written;
  END:
      if (ibuf) free(ibuf);
-    if (obuf) free(obuf);
+    if (obuf[0].data) free(obuf[0].data);
+    if (obuf[1].data) free(obuf[1].data);
      return rc;
  }
  
@@ -543,19 +586,11 @@ void put_tool_tag(m4af_ctx_t *m4af, const aacenc_param_ex_t *params,
  {
      char tool_info[256];
      char *p = tool_info;
-    LIB_INFO *lib_info = 0;
+    LIB_INFO lib_info;
  
      p += sprintf(p, PROGNAME " %s, ", fdkaac_version);
-
-    lib_info = calloc(FDK_MODULE_LAST, sizeof(LIB_INFO));
-    if (aacEncGetLibInfo(lib_info) == AACENC_OK) {
-        int i;
-        for (i = 0; i < FDK_MODULE_LAST; ++i)
-            if (lib_info[i].module_id == FDK_AACENC)
-                break;
-        p += sprintf(p, "libfdk-aac %s, ", lib_info[i].versionStr);
-    }
-    free(lib_info);
+    aacenc_get_lib_info(&lib_info);
+    p += sprintf(p, "libfdk-aac %s, ", lib_info.versionStr);
      if (params->bitrate_mode)
          sprintf(p, "VBR mode %d", params->bitrate_mode);
      else
@@ -651,7 +686,7 @@ int parse_raw_spec(const char *spec, pcm_sample_description_t *desc)
  static pcm_io_vtbl_t pcm_io_vtbl = {
      read_callback, seek_callback, tell_callback
  };
-static pcm_io_vtbl_t pcm_io_vtbl_noseek = { read_callback, 0, 0 };
+static pcm_io_vtbl_t pcm_io_vtbl_noseek = { read_callback, 0, tell_callback };
  
  static
  pcm_reader_t *open_input(aacenc_param_ex_t *params)
@@ -709,10 +744,13 @@ pcm_reader_t *open_input(aacenc_param_ex_t *params)
              }
              break;
          default:
+            fprintf(stderr, "ERROR: unsupported input file\n");
              goto END;
          }
      }
-    return pcm_open_sint16_converter(reader);
+    if ((reader = pcm_open_sint16_converter(reader)) != 0)
+        reader = extrapolater_open(reader);
+    return reader;
  END:
      return 0;
  }
@@ -729,11 +767,13 @@ int main(int argc, char **argv)
      pcm_reader_t *reader = 0;
      HANDLE_AACENCODER encoder = 0;
      AACENC_InfoStruct aacinfo = { 0 };
+    LIB_INFO lib_info = { 0 };
      m4af_ctx_t *m4af = 0;
      const pcm_sample_description_t *sample_format;
      int downsampled_timescale = 0;
      int frame_count = 0;
      int sbr_mode = 0;
+    unsigned scale_shift = 0;
  
      setlocale(LC_CTYPE, "");
      setbuf(stderr, 0);
@@ -755,7 +795,16 @@ int main(int argc, char **argv)
       * way in MPEG4 part3 spec, and seems the only way supported by iTunes.
       * Since FDK library does not support it, we have to do it on our side.
       */
+    sbr_mode = aacenc_is_sbr_active((aacenc_param_t*)&params);
+    if (sbr_mode && !aacenc_is_sbr_ratio_available()) {
+        fprintf(stderr, "WARNING: Only dual-rate SBR is available "
+                        "for this version\n");
+        params.sbr_ratio = 2;
+    }
+    scale_shift = aacenc_is_dual_rate_sbr((aacenc_param_t*)&params);
      params.sbr_signaling = (params.transport_format == TT_MP4_LOAS) ? 2 : 0;
+    if (sbr_mode && !scale_shift)
+        params.sbr_signaling = 2;
  
      if (aacenc_init(&encoder, (aacenc_param_t*)&params, sample_format,
                      &aacinfo) < 0)
@@ -773,15 +822,13 @@ int main(int argc, char **argv)
          goto END;
      }
      handle_signals();
-    sbr_mode = aacenc_is_sbr_active((aacenc_param_t*)&params);
+
      if (!params.transport_format) {
          uint32_t scale;
          uint8_t mp4asc[32];
          uint32_t ascsize = sizeof(mp4asc);
          unsigned framelen = aacinfo.frameLength;
-        if (sbr_mode)
-            downsampled_timescale = 1;
-        scale = sample_format->sample_rate >> downsampled_timescale;
+        scale = sample_format->sample_rate >> scale_shift;
          if ((m4af = m4af_create(M4AF_CODEC_MP4A, scale, &m4af_io,
                                  params.output_fp)) < 0)
              goto END;
@@ -789,13 +836,22 @@ int main(int argc, char **argv)
                        aacinfo.confSize, mp4asc, &ascsize);
          m4af_set_decoder_specific_info(m4af, 0, mp4asc, ascsize);
          m4af_set_fixed_frame_duration(m4af, 0,
-                                      framelen >> downsampled_timescale);
+                                      framelen >> scale_shift);
          m4af_set_vbr_mode(m4af, 0, params.bitrate_mode);
          m4af_set_priming_mode(m4af, params.gapless_mode + 1);
          m4af_begin_write(m4af);
      }
-    frame_count = encode(reader, encoder, aacinfo.frameLength,
-                         params.output_fp, m4af, !params.silent);
+    if (scale_shift && (aacinfo.encoderDelay & 1)) {
+        /*
+         * Since odd delay cannot be exactly expressed in downsampled scale,
+         * we push one zero frame to the encoder here, to make delay even
+         */
+        int16_t zero[8] = { 0 };
+        aacenc_frame_t frame = { 0 };
+        aac_encode_frame(encoder, sample_format, zero, 1, &frame);
+        free(frame.data);
+    }
+    frame_count = encode(&params, reader, encoder, aacinfo.frameLength, m4af);
      if (frame_count < 0)
          goto END;
      if (m4af) {
@@ -805,12 +861,11 @@ int main(int argc, char **argv)
  
          if (sbr_mode && params.profile != AOT_ER_AAC_ELD &&
              !params.include_sbr_delay)
-            delay -= 481 << 1;
-        if (sbr_mode && (delay & 1))
+            delay -= 481 << scale_shift;
+        if (scale_shift && (delay & 1))
              ++delay;
          padding = frame_count * aacinfo.frameLength - frames_read - delay;
-        m4af_set_priming(m4af, 0, delay >> downsampled_timescale,
-                         padding >> downsampled_timescale);
+        m4af_set_priming(m4af, 0, delay >> scale_shift, padding >> scale_shift);
          if (finalize_m4a(m4af, &params, encoder) < 0)
              goto END;
      }