Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rtp mpeg4 #35

Merged
merged 6 commits into from Apr 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -15,6 +15,8 @@
*/
package androidx.media3.common.util;

import static androidx.media3.common.util.Assertions.checkArgument;

import android.util.Pair;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
Expand All @@ -31,6 +33,12 @@ public final class CodecSpecificDataUtil {
private static final String[] HEVC_GENERAL_PROFILE_SPACE_STRINGS =
new String[] {"", "A", "B", "C"};

// MP4V-ES
private static final int VISUAL_OBJECT_LAYER = 1;
private static final int VISUAL_OBJECT_LAYER_START = 0x20;
private static final int EXTENDED_PAR = 0x0F;
private static final int RECTANGULAR = 0x00;

/**
* Parses an ALAC AudioSpecificConfig (i.e. an <a
* href="https://github.com/macosforge/alac/blob/master/ALACMagicCookieDescription.txt">ALACSpecificConfig</a>).
Expand Down Expand Up @@ -72,6 +80,86 @@ public static boolean parseCea708InitializationData(List<byte[]> initializationD
&& initializationData.get(0)[0] == 1;
}

/**
* Parses an MPEG-4 Visual configuration information, as defined in ISO/IEC14496-2
*
* @param videoSpecificConfig A byte array containing the MPEG-4 Visual configuration information
* to parse.
* @return A pair consisting of the width and the height.
*/
public static Pair<Integer, Integer> getVideoResolutionFromMpeg4VideoConfig(
byte[] videoSpecificConfig) {
int offset = 0;
boolean foundVOL = false;
ParsableByteArray scratchBytes = new ParsableByteArray(videoSpecificConfig);
while (offset + 3 < videoSpecificConfig.length) {
if (scratchBytes.readUnsignedInt24() != VISUAL_OBJECT_LAYER
|| (videoSpecificConfig[offset + 3] & 0xf0) != VISUAL_OBJECT_LAYER_START) {
scratchBytes.setPosition(scratchBytes.getPosition() - 2);
offset++;
continue;
}
foundVOL = true;
break;
}

checkArgument(foundVOL, "Invalid input: VOL not found.");

ParsableBitArray scratchBits = new ParsableBitArray(videoSpecificConfig);
// Skip the start codecs from the bitstream
scratchBits.skipBits((offset + 4) * 8);
scratchBits.skipBits(1); // random_accessible_vol
scratchBits.skipBits(8); // video_object_type_indication

if (scratchBits.readBit()) { // object_layer_identifier
scratchBits.skipBits(4); // video_object_layer_verid
scratchBits.skipBits(3); // video_object_layer_priority
}

int aspectRatioInfo = scratchBits.readBits(4);
if (aspectRatioInfo == EXTENDED_PAR) {
scratchBits.skipBits(8); // par_width
scratchBits.skipBits(8); // par_height
}

if (scratchBits.readBit()) { // vol_control_parameters
scratchBits.skipBits(2); // chroma_format
scratchBits.skipBits(1); // low_delay
if (scratchBits.readBit()) { // vbv_parameters
scratchBits.skipBits(79);
}
}

int videoObjectLayerShape = scratchBits.readBits(2);
checkArgument(
videoObjectLayerShape == RECTANGULAR, "Only supports rectangular video object layer shape");

checkArgument(scratchBits.readBit()); // marker_bit
int vopTimeIncrementResolution = scratchBits.readBits(16);
checkArgument(scratchBits.readBit()); // marker_bit

if (scratchBits.readBit()) { // fixed_vop_rate
checkArgument(vopTimeIncrementResolution > 0);
vopTimeIncrementResolution--;
int numBitsToSkip = 0;
while (vopTimeIncrementResolution > 0) {
numBitsToSkip++;
vopTimeIncrementResolution >>= 1;
}
scratchBits.skipBits(numBitsToSkip); // fixed_vop_time_increment
}

checkArgument(scratchBits.readBit()); // marker_bit
int videoObjectLayerWidth = scratchBits.readBits(13);
checkArgument(scratchBits.readBit()); // marker_bit
int videoObjectLayerHeight = scratchBits.readBits(13);
checkArgument(scratchBits.readBit()); // marker_bit

scratchBits.skipBits(1); // interlaced

return Pair.create(videoObjectLayerWidth, videoObjectLayerHeight);
}

/**
* Builds an RFC 6381 AVC codec string using the provided parameters.
*
Expand Down
Expand Up @@ -38,6 +38,7 @@ public final class RtpPayloadFormat {

private static final String RTP_MEDIA_AC3 = "AC3";
private static final String RTP_MEDIA_MPEG4_GENERIC = "MPEG4-GENERIC";
private static final String RTP_MEDIA_MPEG4_VIDEO = "MP4V-ES";
private static final String RTP_MEDIA_H264 = "H264";
private static final String RTP_MEDIA_H265 = "H265";

Expand All @@ -47,6 +48,7 @@ public static boolean isFormatSupported(MediaDescription mediaDescription) {
case RTP_MEDIA_AC3:
case RTP_MEDIA_H264:
case RTP_MEDIA_H265:
case RTP_MEDIA_MPEG4_VIDEO:
case RTP_MEDIA_MPEG4_GENERIC:
return true;
default:
Expand All @@ -69,6 +71,8 @@ public static String getMimeTypeFromRtpMediaType(String mediaType) {
return MimeTypes.VIDEO_H264;
case RTP_MEDIA_H265:
return MimeTypes.VIDEO_H265;
case RTP_MEDIA_MPEG4_VIDEO:
return MimeTypes.VIDEO_MP4V;
case RTP_MEDIA_MPEG4_GENERIC:
return MimeTypes.AUDIO_AAC;
default:
Expand Down
Expand Up @@ -25,6 +25,7 @@

import android.net.Uri;
import android.util.Base64;
import android.util.Pair;
import androidx.annotation.Nullable;
import androidx.annotation.VisibleForTesting;
import androidx.media3.common.C;
Expand All @@ -48,11 +49,14 @@
private static final String PARAMETER_H265_SPROP_PPS = "sprop-pps";
private static final String PARAMETER_H265_SPROP_VPS = "sprop-vps";
private static final String PARAMETER_H265_SPROP_MAX_DON_DIFF = "sprop-max-don-diff";
private static final String PARAMETER_MP4V_CONFIG = "config";

/** Prefix for the RFC6381 codecs string for AAC formats. */
private static final String AAC_CODECS_PREFIX = "mp4a.40.";
/** Prefix for the RFC6381 codecs string for AVC formats. */
private static final String H264_CODECS_PREFIX = "avc1.";
/** Prefix for the RFC6416 codecs string for MPEG4V-ES formats. */
private static final String MPEG4_CODECS_PREFIX = "mp4v.";

private static final String GENERIC_CONTROL_ATTR = "*";

Expand Down Expand Up @@ -121,6 +125,10 @@ public int hashCode() {
checkArgument(!fmtpParameters.isEmpty());
processAacFmtpAttribute(formatBuilder, fmtpParameters, channelCount, clockRate);
break;
case MimeTypes.VIDEO_MP4V:
checkArgument(!fmtpParameters.isEmpty());
processMPEG4FmtpAttribute(formatBuilder, fmtpParameters);
break;
case MimeTypes.VIDEO_H264:
checkArgument(!fmtpParameters.isEmpty());
processH264FmtpAttribute(formatBuilder, fmtpParameters);
Expand Down Expand Up @@ -169,6 +177,23 @@ private static void processAacFmtpAttribute(
AacUtil.buildAacLcAudioSpecificConfig(sampleRate, channelCount)));
}

private static void processMPEG4FmtpAttribute(
Format.Builder formatBuilder, ImmutableMap<String, String> fmtpAttributes) {
@Nullable String configInput = fmtpAttributes.get(PARAMETER_MP4V_CONFIG);
if (configInput != null) {
byte[] configBuffer = Util.getBytesFromHexString(configInput);
formatBuilder.setInitializationData(ImmutableList.of(configBuffer));
Pair<Integer, Integer> resolution =
CodecSpecificDataUtil.getVideoResolutionFromMpeg4VideoConfig(configBuffer);
formatBuilder.setWidth(resolution.first).setHeight(resolution.second);
} else {
// set the default width and height
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the default width and height 352x288? Is it in an RFC? If so please add a link

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default resolution is not mentioned in RFC.
For non-standard fmtp where configinput is missing, we use 352X288 as default since CIF is the default resolution used by Android Software decoder. Adding the link for your reference
https://cs.android.com/android/platform/superproject/+/master:frameworks/av/media/codec2/components/mpeg4_h263/C2SoftMpeg4Dec.cpp;l=130

formatBuilder.setWidth(352).setHeight(288);
}
@Nullable String profileLevel = fmtpAttributes.get(PARAMETER_PROFILE_LEVEL_ID);
formatBuilder.setCodecs(MPEG4_CODECS_PREFIX + (profileLevel == null ? "1" : profileLevel));
}

/** Returns H264/H265 initialization data from the RTP parameter set. */
private static byte[] getInitializationDataFromParameterSet(String parameterSet) {
byte[] decodedParameterNalData = Base64.decode(parameterSet, Base64.DEFAULT);
Expand Down
Expand Up @@ -40,6 +40,8 @@ public RtpPayloadReader createPayloadReader(RtpPayloadFormat payloadFormat) {
return new RtpH264Reader(payloadFormat);
case MimeTypes.VIDEO_H265:
return new RtpH265Reader(payloadFormat);
case MimeTypes.VIDEO_MP4V:
return new RtpMPEG4Reader(payloadFormat);
default:
// No supported reader, returning null.
}
Expand Down
@@ -0,0 +1,147 @@
/*
* Copyright 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.exoplayer.rtsp.reader;

import static androidx.media3.common.util.Assertions.checkStateNotNull;
import static androidx.media3.common.util.Util.castNonNull;

import androidx.media3.common.C;
import androidx.media3.common.ParserException;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.Util;
import androidx.media3.exoplayer.rtsp.RtpPacket;
import androidx.media3.exoplayer.rtsp.RtpPayloadFormat;
import androidx.media3.extractor.ExtractorOutput;
import androidx.media3.extractor.TrackOutput;
import com.google.common.primitives.Bytes;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;

/**
* Parses an MPEG4 byte stream carried on RTP packets, and extracts MPEG4 Access Units. Refer to
* RFC6416 for more details.
*/
/* package */ final class RtpMPEG4Reader implements RtpPayloadReader {
private static final String TAG = "RtpMPEG4Reader";

private static final long MEDIA_CLOCK_FREQUENCY = 90_000;

/** VOP unit type. */
private static final int I_VOP = 0;

private final RtpPayloadFormat payloadFormat;
private @MonotonicNonNull TrackOutput trackOutput;
@C.BufferFlags private int bufferFlags;
private long firstReceivedTimestamp;
private int previousSequenceNumber;
private long startTimeOffsetUs;
private int sampleLength;

/** Creates an instance. */
public RtpMPEG4Reader(RtpPayloadFormat payloadFormat) {
this.payloadFormat = payloadFormat;
firstReceivedTimestamp = C.TIME_UNSET;
previousSequenceNumber = C.INDEX_UNSET;
sampleLength = 0;
}

@Override
public void createTracks(ExtractorOutput extractorOutput, int trackId) {
trackOutput = extractorOutput.track(trackId, C.TRACK_TYPE_VIDEO);
castNonNull(trackOutput).format(payloadFormat.format);
}

@Override
public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {}

@Override
public void consume(ParsableByteArray data, long timestamp, int sequenceNumber, boolean rtpMarker)
throws ParserException {
checkStateNotNull(trackOutput);
// Check that this packet is in the sequence of the previous packet.
if (previousSequenceNumber != C.INDEX_UNSET) {
int expectedSequenceNumber = RtpPacket.getNextSequenceNumber(previousSequenceNumber);
if (sequenceNumber != expectedSequenceNumber) {
Log.w(
TAG,
Util.formatInvariant(
"Received RTP packet with unexpected sequence number. Expected: %d; received: %d."
+ " Dropping packet.",
expectedSequenceNumber, sequenceNumber));
return;
}
}

// Parse VOP Type and get the buffer flags
int limit = data.bytesLeft();
trackOutput.sampleData(data, limit);
if (sampleLength == 0) {
bufferFlags = getBufferFlagsFromVop(data);
}
sampleLength += limit;

// Marker (M) bit: The marker bit is set to 1 to indicate the last RTP
// packet(or only RTP packet) of a VOP. When multiple VOPs are carried
// in the same RTP packet, the marker bit is set to 1.
if (rtpMarker) {
if (firstReceivedTimestamp == C.TIME_UNSET) {
firstReceivedTimestamp = timestamp;
}

long timeUs = toSampleUs(startTimeOffsetUs, timestamp, firstReceivedTimestamp);
trackOutput.sampleMetadata(timeUs, bufferFlags, sampleLength, 0, null);
sampleLength = 0;
}
previousSequenceNumber = sequenceNumber;
}

@Override
public void seek(long nextRtpTimestamp, long timeUs) {
firstReceivedTimestamp = nextRtpTimestamp;
startTimeOffsetUs = timeUs;
sampleLength = 0;
}

// Internal methods.

/**
* Parses VOP Coding type.
*
* Sets {@link #bufferFlags} according to the VOP Coding type.
*/
@C.BufferFlags
private static int getBufferFlagsFromVop(ParsableByteArray data) {
// search for VOP_START_CODE (00 00 01 B6)
byte[] inputData = data.getData();
byte[] startCode = new byte[] {0x0, 0x0, 0x1, (byte) 0xB6};
int vopStartCodePos = Bytes.indexOf(inputData, startCode);
if (vopStartCodePos != -1) {
data.setPosition(vopStartCodePos + 4);
int vopType = data.peekUnsignedByte() >> 6;
return (vopType == I_VOP ? C.BUFFER_FLAG_KEY_FRAME : 0);
}
return 0;
}

private static long toSampleUs(
long startTimeOffsetUs, long rtpTimestamp, long firstReceivedRtpTimestamp) {
return startTimeOffsetUs
+ Util.scaleLargeTimestamp(
(rtpTimestamp - firstReceivedRtpTimestamp),
/* multiplier= */ C.MICROS_PER_SECOND,
/* divisor= */ MEDIA_CLOCK_FREQUENCY);
}
}