diff --git a/libraries/common/src/main/java/androidx/media3/common/util/CodecSpecificDataUtil.java b/libraries/common/src/main/java/androidx/media3/common/util/CodecSpecificDataUtil.java index b83620df38..821a7a2ba6 100644 --- a/libraries/common/src/main/java/androidx/media3/common/util/CodecSpecificDataUtil.java +++ b/libraries/common/src/main/java/androidx/media3/common/util/CodecSpecificDataUtil.java @@ -15,6 +15,8 @@ */ package androidx.media3.common.util; +import static androidx.media3.common.util.Assertions.checkArgument; + import android.util.Pair; import androidx.annotation.Nullable; import androidx.media3.common.C; @@ -31,6 +33,12 @@ public final class CodecSpecificDataUtil { private static final String[] HEVC_GENERAL_PROFILE_SPACE_STRINGS = new String[] {"", "A", "B", "C"}; + // MP4V-ES + private static final int VISUAL_OBJECT_LAYER = 1; + private static final int VISUAL_OBJECT_LAYER_START = 0x20; + private static final int EXTENDED_PAR = 0x0F; + private static final int RECTANGULAR = 0x00; + /** * Parses an ALAC AudioSpecificConfig (i.e. an ALACSpecificConfig). @@ -72,6 +80,86 @@ public static boolean parseCea708InitializationData(List initializationD && initializationData.get(0)[0] == 1; } + /** + * Parses an MPEG-4 Visual configuration information, as defined in ISO/IEC14496-2 + * + * @param videoSpecificConfig A byte array containing the MPEG-4 Visual configuration information + * to parse. + * @return A pair consisting of the width and the height. + */ + public static Pair getVideoResolutionFromMpeg4VideoConfig( + byte[] videoSpecificConfig) { + int offset = 0; + boolean foundVOL = false; + ParsableByteArray scratchBytes = new ParsableByteArray(videoSpecificConfig); + while (offset + 3 < videoSpecificConfig.length) { + if (scratchBytes.readUnsignedInt24() != VISUAL_OBJECT_LAYER + || (videoSpecificConfig[offset + 3] & 0xf0) != VISUAL_OBJECT_LAYER_START) { + scratchBytes.setPosition(scratchBytes.getPosition() - 2); + offset++; + continue; + } + foundVOL = true; + break; + } + + checkArgument(foundVOL, "Invalid input: VOL not found."); + + ParsableBitArray scratchBits = new ParsableBitArray(videoSpecificConfig); + // Skip the start codecs from the bitstream + scratchBits.skipBits((offset + 4) * 8); + scratchBits.skipBits(1); // random_accessible_vol + scratchBits.skipBits(8); // video_object_type_indication + + if (scratchBits.readBit()) { // object_layer_identifier + scratchBits.skipBits(4); // video_object_layer_verid + scratchBits.skipBits(3); // video_object_layer_priority + } + + int aspectRatioInfo = scratchBits.readBits(4); + if (aspectRatioInfo == EXTENDED_PAR) { + scratchBits.skipBits(8); // par_width + scratchBits.skipBits(8); // par_height + } + + if (scratchBits.readBit()) { // vol_control_parameters + scratchBits.skipBits(2); // chroma_format + scratchBits.skipBits(1); // low_delay + if (scratchBits.readBit()) { // vbv_parameters + scratchBits.skipBits(79); + } + } + + int videoObjectLayerShape = scratchBits.readBits(2); + checkArgument( + videoObjectLayerShape == RECTANGULAR, "Only supports rectangular video object layer shape"); + + checkArgument(scratchBits.readBit()); // marker_bit + int vopTimeIncrementResolution = scratchBits.readBits(16); + checkArgument(scratchBits.readBit()); // marker_bit + + if (scratchBits.readBit()) { // fixed_vop_rate + checkArgument(vopTimeIncrementResolution > 0); + vopTimeIncrementResolution--; + int numBitsToSkip = 0; + while (vopTimeIncrementResolution > 0) { + numBitsToSkip++; + vopTimeIncrementResolution >>= 1; + } + scratchBits.skipBits(numBitsToSkip); // fixed_vop_time_increment + } + + checkArgument(scratchBits.readBit()); // marker_bit + int videoObjectLayerWidth = scratchBits.readBits(13); + checkArgument(scratchBits.readBit()); // marker_bit + int videoObjectLayerHeight = scratchBits.readBits(13); + checkArgument(scratchBits.readBit()); // marker_bit + + scratchBits.skipBits(1); // interlaced + + return Pair.create(videoObjectLayerWidth, videoObjectLayerHeight); + } + /** * Builds an RFC 6381 AVC codec string using the provided parameters. * diff --git a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtpPayloadFormat.java b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtpPayloadFormat.java index 297353167b..353b893f15 100644 --- a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtpPayloadFormat.java +++ b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtpPayloadFormat.java @@ -38,6 +38,7 @@ public final class RtpPayloadFormat { private static final String RTP_MEDIA_AC3 = "AC3"; private static final String RTP_MEDIA_MPEG4_GENERIC = "MPEG4-GENERIC"; + private static final String RTP_MEDIA_MPEG4_VIDEO = "MP4V-ES"; private static final String RTP_MEDIA_H264 = "H264"; private static final String RTP_MEDIA_H265 = "H265"; @@ -47,6 +48,7 @@ public static boolean isFormatSupported(MediaDescription mediaDescription) { case RTP_MEDIA_AC3: case RTP_MEDIA_H264: case RTP_MEDIA_H265: + case RTP_MEDIA_MPEG4_VIDEO: case RTP_MEDIA_MPEG4_GENERIC: return true; default: @@ -69,6 +71,8 @@ public static String getMimeTypeFromRtpMediaType(String mediaType) { return MimeTypes.VIDEO_H264; case RTP_MEDIA_H265: return MimeTypes.VIDEO_H265; + case RTP_MEDIA_MPEG4_VIDEO: + return MimeTypes.VIDEO_MP4V; case RTP_MEDIA_MPEG4_GENERIC: return MimeTypes.AUDIO_AAC; default: diff --git a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java index 7547f1ea18..f4b10981ca 100644 --- a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java +++ b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java @@ -25,6 +25,7 @@ import android.net.Uri; import android.util.Base64; +import android.util.Pair; import androidx.annotation.Nullable; import androidx.annotation.VisibleForTesting; import androidx.media3.common.C; @@ -48,11 +49,14 @@ private static final String PARAMETER_H265_SPROP_PPS = "sprop-pps"; private static final String PARAMETER_H265_SPROP_VPS = "sprop-vps"; private static final String PARAMETER_H265_SPROP_MAX_DON_DIFF = "sprop-max-don-diff"; + private static final String PARAMETER_MP4V_CONFIG = "config"; /** Prefix for the RFC6381 codecs string for AAC formats. */ private static final String AAC_CODECS_PREFIX = "mp4a.40."; /** Prefix for the RFC6381 codecs string for AVC formats. */ private static final String H264_CODECS_PREFIX = "avc1."; + /** Prefix for the RFC6416 codecs string for MPEG4V-ES formats. */ + private static final String MPEG4_CODECS_PREFIX = "mp4v."; private static final String GENERIC_CONTROL_ATTR = "*"; @@ -121,6 +125,10 @@ public int hashCode() { checkArgument(!fmtpParameters.isEmpty()); processAacFmtpAttribute(formatBuilder, fmtpParameters, channelCount, clockRate); break; + case MimeTypes.VIDEO_MP4V: + checkArgument(!fmtpParameters.isEmpty()); + processMPEG4FmtpAttribute(formatBuilder, fmtpParameters); + break; case MimeTypes.VIDEO_H264: checkArgument(!fmtpParameters.isEmpty()); processH264FmtpAttribute(formatBuilder, fmtpParameters); @@ -169,6 +177,23 @@ private static void processAacFmtpAttribute( AacUtil.buildAacLcAudioSpecificConfig(sampleRate, channelCount))); } + private static void processMPEG4FmtpAttribute( + Format.Builder formatBuilder, ImmutableMap fmtpAttributes) { + @Nullable String configInput = fmtpAttributes.get(PARAMETER_MP4V_CONFIG); + if (configInput != null) { + byte[] configBuffer = Util.getBytesFromHexString(configInput); + formatBuilder.setInitializationData(ImmutableList.of(configBuffer)); + Pair resolution = + CodecSpecificDataUtil.getVideoResolutionFromMpeg4VideoConfig(configBuffer); + formatBuilder.setWidth(resolution.first).setHeight(resolution.second); + } else { + // set the default width and height + formatBuilder.setWidth(352).setHeight(288); + } + @Nullable String profileLevel = fmtpAttributes.get(PARAMETER_PROFILE_LEVEL_ID); + formatBuilder.setCodecs(MPEG4_CODECS_PREFIX + (profileLevel == null ? "1" : profileLevel)); + } + /** Returns H264/H265 initialization data from the RTP parameter set. */ private static byte[] getInitializationDataFromParameterSet(String parameterSet) { byte[] decodedParameterNalData = Base64.decode(parameterSet, Base64.DEFAULT); diff --git a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/DefaultRtpPayloadReaderFactory.java b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/DefaultRtpPayloadReaderFactory.java index 888939b7e8..d2fc4f6dea 100644 --- a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/DefaultRtpPayloadReaderFactory.java +++ b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/DefaultRtpPayloadReaderFactory.java @@ -40,6 +40,8 @@ public RtpPayloadReader createPayloadReader(RtpPayloadFormat payloadFormat) { return new RtpH264Reader(payloadFormat); case MimeTypes.VIDEO_H265: return new RtpH265Reader(payloadFormat); + case MimeTypes.VIDEO_MP4V: + return new RtpMPEG4Reader(payloadFormat); default: // No supported reader, returning null. } diff --git a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpMPEG4Reader.java b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpMPEG4Reader.java new file mode 100644 index 0000000000..82556057c6 --- /dev/null +++ b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/RtpMPEG4Reader.java @@ -0,0 +1,147 @@ +/* + * Copyright 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package androidx.media3.exoplayer.rtsp.reader; + +import static androidx.media3.common.util.Assertions.checkStateNotNull; +import static androidx.media3.common.util.Util.castNonNull; + +import androidx.media3.common.C; +import androidx.media3.common.ParserException; +import androidx.media3.common.util.Log; +import androidx.media3.common.util.ParsableByteArray; +import androidx.media3.common.util.Util; +import androidx.media3.exoplayer.rtsp.RtpPacket; +import androidx.media3.exoplayer.rtsp.RtpPayloadFormat; +import androidx.media3.extractor.ExtractorOutput; +import androidx.media3.extractor.TrackOutput; +import com.google.common.primitives.Bytes; +import org.checkerframework.checker.nullness.qual.MonotonicNonNull; + +/** + * Parses an MPEG4 byte stream carried on RTP packets, and extracts MPEG4 Access Units. Refer to + * RFC6416 for more details. + */ +/* package */ final class RtpMPEG4Reader implements RtpPayloadReader { + private static final String TAG = "RtpMPEG4Reader"; + + private static final long MEDIA_CLOCK_FREQUENCY = 90_000; + + /** VOP unit type. */ + private static final int I_VOP = 0; + + private final RtpPayloadFormat payloadFormat; + private @MonotonicNonNull TrackOutput trackOutput; + @C.BufferFlags private int bufferFlags; + private long firstReceivedTimestamp; + private int previousSequenceNumber; + private long startTimeOffsetUs; + private int sampleLength; + + /** Creates an instance. */ + public RtpMPEG4Reader(RtpPayloadFormat payloadFormat) { + this.payloadFormat = payloadFormat; + firstReceivedTimestamp = C.TIME_UNSET; + previousSequenceNumber = C.INDEX_UNSET; + sampleLength = 0; + } + + @Override + public void createTracks(ExtractorOutput extractorOutput, int trackId) { + trackOutput = extractorOutput.track(trackId, C.TRACK_TYPE_VIDEO); + castNonNull(trackOutput).format(payloadFormat.format); + } + + @Override + public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {} + + @Override + public void consume(ParsableByteArray data, long timestamp, int sequenceNumber, boolean rtpMarker) + throws ParserException { + checkStateNotNull(trackOutput); + // Check that this packet is in the sequence of the previous packet. + if (previousSequenceNumber != C.INDEX_UNSET) { + int expectedSequenceNumber = RtpPacket.getNextSequenceNumber(previousSequenceNumber); + if (sequenceNumber != expectedSequenceNumber) { + Log.w( + TAG, + Util.formatInvariant( + "Received RTP packet with unexpected sequence number. Expected: %d; received: %d." + + " Dropping packet.", + expectedSequenceNumber, sequenceNumber)); + return; + } + } + + // Parse VOP Type and get the buffer flags + int limit = data.bytesLeft(); + trackOutput.sampleData(data, limit); + if (sampleLength == 0) { + bufferFlags = getBufferFlagsFromVop(data); + } + sampleLength += limit; + + // Marker (M) bit: The marker bit is set to 1 to indicate the last RTP + // packet(or only RTP packet) of a VOP. When multiple VOPs are carried + // in the same RTP packet, the marker bit is set to 1. + if (rtpMarker) { + if (firstReceivedTimestamp == C.TIME_UNSET) { + firstReceivedTimestamp = timestamp; + } + + long timeUs = toSampleUs(startTimeOffsetUs, timestamp, firstReceivedTimestamp); + trackOutput.sampleMetadata(timeUs, bufferFlags, sampleLength, 0, null); + sampleLength = 0; + } + previousSequenceNumber = sequenceNumber; + } + + @Override + public void seek(long nextRtpTimestamp, long timeUs) { + firstReceivedTimestamp = nextRtpTimestamp; + startTimeOffsetUs = timeUs; + sampleLength = 0; + } + + // Internal methods. + + /** + * Parses VOP Coding type. + * + * Sets {@link #bufferFlags} according to the VOP Coding type. + */ + @C.BufferFlags + private static int getBufferFlagsFromVop(ParsableByteArray data) { + // search for VOP_START_CODE (00 00 01 B6) + byte[] inputData = data.getData(); + byte[] startCode = new byte[] {0x0, 0x0, 0x1, (byte) 0xB6}; + int vopStartCodePos = Bytes.indexOf(inputData, startCode); + if (vopStartCodePos != -1) { + data.setPosition(vopStartCodePos + 4); + int vopType = data.peekUnsignedByte() >> 6; + return (vopType == I_VOP ? C.BUFFER_FLAG_KEY_FRAME : 0); + } + return 0; + } + + private static long toSampleUs( + long startTimeOffsetUs, long rtpTimestamp, long firstReceivedRtpTimestamp) { + return startTimeOffsetUs + + Util.scaleLargeTimestamp( + (rtpTimestamp - firstReceivedRtpTimestamp), + /* multiplier= */ C.MICROS_PER_SECOND, + /* divisor= */ MEDIA_CLOCK_FREQUENCY); + } +}