Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for RTSP Mp4a-Latm #162

Closed
wants to merge 11 commits into from
Expand Up @@ -55,6 +55,7 @@ public final class RtpPayloadFormat {
private static final String RTP_MEDIA_PCMU = "PCMU";
private static final String RTP_MEDIA_VP8 = "VP8";
private static final String RTP_MEDIA_VP9 = "VP9";
public static final String RTP_MEDIA_MPEG4_AUDIO = "MP4A-LATM";

/** Returns whether the format of a {@link MediaDescription} is supported. */
public static boolean isFormatSupported(MediaDescription mediaDescription) {
Expand All @@ -66,6 +67,7 @@ public static boolean isFormatSupported(MediaDescription mediaDescription) {
case RTP_MEDIA_H263_2000:
case RTP_MEDIA_H264:
case RTP_MEDIA_H265:
case RTP_MEDIA_MPEG4_AUDIO:
case RTP_MEDIA_MPEG4_VIDEO:
case RTP_MEDIA_MPEG4_GENERIC:
case RTP_MEDIA_OPUS:
Expand Down Expand Up @@ -97,6 +99,7 @@ public static String getMimeTypeFromRtpMediaType(String mediaType) {
case RTP_MEDIA_AMR_WB:
return MimeTypes.AUDIO_AMR_WB;
case RTP_MEDIA_MPEG4_GENERIC:
case RTP_MEDIA_MPEG4_AUDIO:
return MimeTypes.AUDIO_AAC;
case RTP_MEDIA_OPUS:
return MimeTypes.AUDIO_OPUS;
Expand Down Expand Up @@ -142,6 +145,7 @@ public static String getMimeTypeFromRtpMediaType(String mediaType) {
public final Format format;
/** The format parameters, mapped from the SDP FMTP attribute (RFC2327 Page 22). */
public final ImmutableMap<String, String> fmtpParameters;
public final String mediaEncoding;

/**
* Creates a new instance.
Expand All @@ -154,12 +158,13 @@ public static String getMimeTypeFromRtpMediaType(String mediaType) {
* empty if unset. The keys and values are specified in the RFCs for specific formats. For
* instance, RFC3640 Section 4.1 defines keys like profile-level-id and config.
*/
public RtpPayloadFormat(
Format format, int rtpPayloadType, int clockRate, Map<String, String> fmtpParameters) {
public RtpPayloadFormat(Format format, int rtpPayloadType, int clockRate, Map<String,
String> fmtpParameters, String mediaEncoding) {
this.rtpPayloadType = rtpPayloadType;
this.clockRate = clockRate;
this.format = format;
this.fmtpParameters = ImmutableMap.copyOf(fmtpParameters);
this.mediaEncoding = mediaEncoding;
}

@Override
Expand Down
Expand Up @@ -31,7 +31,9 @@
import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.ParserException;
import androidx.media3.common.util.CodecSpecificDataUtil;
import androidx.media3.common.util.ParsableBitArray;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import androidx.media3.extractor.AacUtil;
Expand All @@ -53,6 +55,7 @@
private static final String PARAMETER_H265_SPROP_VPS = "sprop-vps";
private static final String PARAMETER_H265_SPROP_MAX_DON_DIFF = "sprop-max-don-diff";
private static final String PARAMETER_MP4V_CONFIG = "config";
private static final String PARAMETER_MP4A_C_PRESENT = "cpresent";

/** Prefix for the RFC6381 codecs string for AAC formats. */
private static final String AAC_CODECS_PREFIX = "mp4a.40.";
Expand Down Expand Up @@ -208,6 +211,21 @@ public int hashCode() {
case MimeTypes.AUDIO_AAC:
checkArgument(channelCount != C.INDEX_UNSET);
checkArgument(!fmtpParameters.isEmpty());
if(mediaEncoding.equals(RtpPayloadFormat.RTP_MEDIA_MPEG4_AUDIO)) {
boolean isConfigPresent = true;
if (fmtpParameters.get(PARAMETER_MP4A_C_PRESENT) != null && fmtpParameters.get(
PARAMETER_MP4A_C_PRESENT).equals("0")) {
isConfigPresent = false;
}
checkArgument(!isConfigPresent, "cpresent == 0 means we need to parse config");
@Nullable String configInput = fmtpParameters.get(PARAMETER_MP4V_CONFIG);
if (configInput != null && configInput.length() % 2 == 0) {
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
Pair<Integer, Integer> configParameters = getSampleRateAndChannelCount(configInput);
channelCount = configParameters.first;
clockRate = configParameters.second;
formatBuilder.setSampleRate(clockRate).setChannelCount(channelCount);
claincly marked this conversation as resolved.
Show resolved Hide resolved
}
}
processAacFmtpAttribute(formatBuilder, fmtpParameters, channelCount, clockRate);
break;
case MimeTypes.AUDIO_AMR_NB:
Expand Down Expand Up @@ -267,7 +285,8 @@ public int hashCode() {
}

checkArgument(clockRate > 0);
return new RtpPayloadFormat(formatBuilder.build(), rtpPayloadType, clockRate, fmtpParameters);
return new RtpPayloadFormat(
formatBuilder.build(), rtpPayloadType, clockRate, fmtpParameters, mediaEncoding);
claincly marked this conversation as resolved.
Show resolved Hide resolved
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
}

private static int inferChannelCount(int encodingParameter, String mimeType) {
Expand Down Expand Up @@ -300,6 +319,32 @@ private static void processAacFmtpAttribute(
AacUtil.buildAacLcAudioSpecificConfig(sampleRate, channelCount)));
}

/**
* Returns a {@link Pair} of sample rate and channel count, by parsing the
* MPEG4 Audio Stream Mux configuration.
*
* <p>fmtp attribute {@code config} includes the MPEG4 Audio Stream Mux
* configuration (ISO/IEC14496-3, Chapter 1.7.3).
*/
private static Pair<Integer, Integer> getSampleRateAndChannelCount(String configInput) {
ParsableBitArray config = new ParsableBitArray(Util.getBytesFromHexString(configInput));
int audioMuxVersion = config .readBits(1);
if (audioMuxVersion == 0) {
checkArgument(config .readBits(1) == 1, "Only supports one allStreamsSameTimeFraming.");
config .readBits(6);
checkArgument(config .readBits(4) == 0, "Only supports one program.");
checkArgument(config .readBits(3) == 0, "Only supports one numLayer.");
@Nullable AacUtil.Config aacConfig = null;
try {
aacConfig = AacUtil.parseAudioSpecificConfig(config , false);
} catch (ParserException e) {
throw new IllegalArgumentException(e);
}
return Pair.create(aacConfig.channelCount, aacConfig.sampleRateHz);
}
throw new IllegalArgumentException ("Only support audio mux version 0");
}

private static void processMPEG4FmtpAttribute(
Format.Builder formatBuilder, ImmutableMap<String, String> fmtpAttributes) {
@Nullable String configInput = fmtpAttributes.get(PARAMETER_MP4V_CONFIG);
Expand Down
Expand Up @@ -35,7 +35,11 @@ public RtpPayloadReader createPayloadReader(RtpPayloadFormat payloadFormat) {
case MimeTypes.AUDIO_AC3:
return new RtpAc3Reader(payloadFormat);
case MimeTypes.AUDIO_AAC:
return new RtpAacReader(payloadFormat);
if(payloadFormat.mediaEncoding.equals(RtpPayloadFormat.RTP_MEDIA_MPEG4_AUDIO)){
return new RtpMp4aReader(payloadFormat);
} else {
return new RtpAacReader(payloadFormat);
}
case MimeTypes.AUDIO_AMR_NB:
case MimeTypes.AUDIO_AMR_WB:
return new RtpAmrReader(payloadFormat);
Expand Down
@@ -0,0 +1,182 @@
/*
* Copyright 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.exoplayer.rtsp.reader;

import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Assertions.checkStateNotNull;
import static androidx.media3.common.util.Assertions.checkArgument;
import static androidx.media3.common.util.Util.castNonNull;
import static androidx.media3.exoplayer.rtsp.reader.RtpReaderUtils.toSampleTimeUs;

import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.ParserException;
import androidx.media3.common.util.ParsableBitArray;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import androidx.media3.exoplayer.rtsp.RtpPacket;
import androidx.media3.exoplayer.rtsp.RtpPayloadFormat;
import androidx.media3.extractor.ExtractorOutput;
import androidx.media3.extractor.TrackOutput;

import com.google.common.collect.ImmutableMap;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;

/**
* Parses an MP4A-LATM byte stream carried on RTP packets, and extracts MP4A-LATM Access Units.
* Refer to RFC3016 for more details.
*/
@UnstableApi
/* package */ final class RtpMp4aReader implements RtpPayloadReader {
private static final String TAG = "RtpMp4aLatmReader";

private static final String PARAMETER_MP4A_CONFIG = "config";

private final RtpPayloadFormat payloadFormat;
private @MonotonicNonNull TrackOutput trackOutput;
private long firstReceivedTimestamp;
private int previousSequenceNumber;
/** The combined size of a sample that is fragmented into multiple subFrames. */
private int fragmentedSampleSizeBytes;
private long startTimeOffsetUs;
private long sampleTimeUsOfFragmentedSample;
private int numberOfSubframes;

/** Creates an instance. */
public RtpMp4aReader(RtpPayloadFormat payloadFormat) {
this.payloadFormat = payloadFormat;
firstReceivedTimestamp = C.TIME_UNSET;
previousSequenceNumber = C.INDEX_UNSET;
fragmentedSampleSizeBytes = 0;
// The start time offset must be 0 until the first seek.
startTimeOffsetUs = 0;
sampleTimeUsOfFragmentedSample = C.TIME_UNSET;
}

@Override
public void createTracks(ExtractorOutput extractorOutput, int trackId) {
trackOutput = extractorOutput.track(trackId, C.TRACK_TYPE_VIDEO);
castNonNull(trackOutput).format(payloadFormat.format);
}

@Override
public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {
checkState(firstReceivedTimestamp == C.TIME_UNSET);
firstReceivedTimestamp = timestamp;
try {
numberOfSubframes = getNumOfSubframesFromMpeg4AudioConfig(payloadFormat.fmtpParameters);
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
} catch (ParserException e) {
throw new IllegalArgumentException(e);
}
}

@Override
public void consume(
ParsableByteArray data, long timestamp, int sequenceNumber, boolean rtpMarker)
throws ParserException {
checkStateNotNull(trackOutput);

int expectedSequenceNumber = RtpPacket.getNextSequenceNumber(previousSequenceNumber);
if(fragmentedSampleSizeBytes > 0 && expectedSequenceNumber < sequenceNumber) {
outputSampleMetadataForFragmentedPackets();
}
int audioPayloadOffset = 0;
for (int subFrame = 0; subFrame < numberOfSubframes; subFrame++) {
int sampleLength = 0;

/**
* This implements PayloadLengthInfo() in Chapter 1.7.3.1, it's only support one program and
* one layer.
* Each subframe starts with a variable length encoding.
*/
for (; audioPayloadOffset < data.bytesLeft(); audioPayloadOffset++) {
int payloadMuxLength = data.readUnsignedByte();
sampleLength += payloadMuxLength;
if (payloadMuxLength != 0xff) {
break;
}
}

// Write the audio sample
trackOutput.sampleData(data, sampleLength);
audioPayloadOffset+= sampleLength;
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
fragmentedSampleSizeBytes += sampleLength;
}
sampleTimeUsOfFragmentedSample = toSampleTimeUs(startTimeOffsetUs, timestamp,
firstReceivedTimestamp, payloadFormat.clockRate);
if (rtpMarker) {
outputSampleMetadataForFragmentedPackets();
}
previousSequenceNumber = sequenceNumber;
}

@Override
public void seek(long nextRtpTimestamp, long timeUs) {
firstReceivedTimestamp = nextRtpTimestamp;
fragmentedSampleSizeBytes = 0;
startTimeOffsetUs = timeUs;
}

// Internal methods.

/**
* Parses an MPEG-4 Audio Stream Mux configuration, as defined in ISO/IEC14496-3. FMTP attribute
* contains config which is a byte array containing the MPEG-4 Audio Stream Mux configuration to
* parse.
*
* @param fmtpAttributes The format parameters, mapped from the SDP FMTP attribute.
* @return The number of subframes.
* @throws ParserException If the MPEG-4 Audio Stream Mux configuration cannot be parsed due to
* unsupported audioMuxVersion.
*/
private static int getNumOfSubframesFromMpeg4AudioConfig(
ImmutableMap<String, String> fmtpAttributes) throws ParserException {
@Nullable String configInput = fmtpAttributes.get(PARAMETER_MP4A_CONFIG);
int numberOfSubframes = 0;
if (configInput != null && configInput.length() % 2 == 0) {
byte[] configBuffer = Util.getBytesFromHexString(configInput);
ParsableBitArray scratchBits = new ParsableBitArray(configBuffer);
int audioMuxVersion = scratchBits.readBits(1);
if (audioMuxVersion == 0) {
checkArgument(scratchBits.readBits(1) == 1, "Invalid allStreamsSameTimeFraming.");
numberOfSubframes = scratchBits.readBits(6);
checkArgument(scratchBits.readBits(4) == 0, "Invalid numProgram.");
checkArgument(scratchBits.readBits(3) == 0, "Invalid numLayer.");
} else {
throw ParserException.createForMalformedDataOfUnknownType(
"unsupported audio mux version: " + audioMuxVersion, null);
}
}
return numberOfSubframes + 1;
}

/**
* Outputs sample metadata.
*
* <p>Call this method only when receiving a end of Mpeg4 partition
*/
private void outputSampleMetadataForFragmentedPackets() {
trackOutput.sampleMetadata(
sampleTimeUsOfFragmentedSample,
C.BUFFER_FLAG_KEY_FRAME,
fragmentedSampleSizeBytes,
/* offset= */ 0,
/* cryptoData= */ null);
fragmentedSampleSizeBytes = 0;
sampleTimeUsOfFragmentedSample = C.TIME_UNSET;
}
}