From 9189e56a9550e981be3d0cb8a14592e24a4fda7f Mon Sep 17 00:00:00 2001 From: James Newton-King Date: Wed, 16 Dec 2020 13:34:16 +1300 Subject: [PATCH] Improve WriteString perf with SIMD --- .../Buffers/ArrayBufferWriter.cs | 12 +- .../CodedOutputStreamTest.cs | 20 ++-- .../Google.Protobuf.Test.csproj | 2 +- .../Google.Protobuf.Test/JsonParserTest.cs | 2 + .../Google.Protobuf.Test/JsonTokenizerTest.cs | 2 + .../LegacyGeneratedCodeTest.cs | 2 +- .../MessageParsingHelpers.cs | 6 +- .../Google.Protobuf/Google.Protobuf.csproj | 16 ++- .../src/Google.Protobuf/WritingPrimitives.cs | 109 ++++++++++++++++-- 9 files changed, 137 insertions(+), 34 deletions(-) diff --git a/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs b/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs index 5b9913b29035..a36a9513c72a 100644 --- a/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs +++ b/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs @@ -42,7 +42,7 @@ namespace Google.Protobuf.Buffers /// ArrayBufferWriter is originally from corefx, and has been contributed to Protobuf /// https://github.com/dotnet/runtime/blob/071da4c41aa808c949a773b92dca6f88de9d11f3/src/libraries/Common/src/System/Buffers/ArrayBufferWriter.cs /// - internal sealed class ArrayBufferWriter : IBufferWriter + internal sealed class TestArrayBufferWriter : IBufferWriter { private T[] _buffer; private int _index; @@ -50,10 +50,10 @@ internal sealed class ArrayBufferWriter : IBufferWriter private const int DefaultInitialBufferSize = 256; /// - /// Creates an instance of an , in which data can be written to, + /// Creates an instance of an , in which data can be written to, /// with the default initial capacity. /// - public ArrayBufferWriter() + public TestArrayBufferWriter() { _buffer = new T[0]; _index = 0; @@ -66,14 +66,14 @@ public ArrayBufferWriter() public int? MaxGrowBy { get; set; } /// - /// Creates an instance of an , in which data can be written to, + /// Creates an instance of an , in which data can be written to, /// with an initial capacity specified. /// /// The minimum capacity with which to initialize the underlying buffer. /// /// Thrown when is not positive (i.e. less than or equal to 0). /// - public ArrayBufferWriter(int initialCapacity) + public TestArrayBufferWriter(int initialCapacity) { if (initialCapacity <= 0) throw new ArgumentException(nameof(initialCapacity)); @@ -111,7 +111,7 @@ public ArrayBufferWriter(int initialCapacity) /// Clears the data written to the underlying buffer. /// /// - /// You must clear the before trying to re-use it. + /// You must clear the before trying to re-use it. /// public void Clear() { diff --git a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs index 1c77e121d3eb..545d0468b894 100644 --- a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs +++ b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs @@ -57,7 +57,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) Assert.AreEqual(data, rawOutput.ToArray()); // IBufferWriter - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt32((uint) value); ctx.Flush(); @@ -76,7 +76,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) Assert.AreEqual(data, rawOutput.ToArray()); // IBufferWriter - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt64(value); ctx.Flush(); @@ -99,7 +99,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = bufferSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt32((uint) value); @@ -114,7 +114,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = bufferSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt64(value); @@ -173,7 +173,7 @@ private static void AssertWriteLittleEndian32(byte[] data, uint value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed32(value); ctx.Flush(); @@ -189,7 +189,7 @@ private static void AssertWriteLittleEndian32(byte[] data, uint value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = bufferSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed32(value); @@ -211,7 +211,7 @@ private static void AssertWriteLittleEndian64(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed64(value); ctx.Flush(); @@ -227,7 +227,7 @@ private static void AssertWriteLittleEndian64(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = blockSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed64(value); @@ -269,7 +269,7 @@ public void WriteWholeMessage_VaryingBlockSizes() output.Flush(); Assert.AreEqual(rawBytes, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = blockSize; message.WriteTo(bufferWriter); Assert.AreEqual(rawBytes, bufferWriter.WrittenSpan.ToArray()); @@ -291,7 +291,7 @@ public void WriteContext_WritesWithFlushes() output.Flush(); byte[] expectedBytes2 = expectedOutput.ToArray(); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteMessage(message); ctx.Flush(); diff --git a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj index 7bd3f84e459e..cdfa98e09874 100644 --- a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj +++ b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj @@ -1,7 +1,7 @@  - net451;netcoreapp2.1 + net451;netcoreapp2.1;net50 ../../keys/Google.Protobuf.snk true False diff --git a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs index e170fcc5a09c..87a389aecbb5 100644 --- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs @@ -551,9 +551,11 @@ public void NumberToDouble_Valid(string jsonValue, double expectedParsedValue) } [Test] +#if !NET5_0 [TestCase("1.7977e308")] [TestCase("-1.7977e308")] [TestCase("1e309")] +#endif [TestCase("1,0")] [TestCase("1.0.0")] [TestCase("+1")] diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs index df43effd4ff0..55ec02ea023f 100644 --- a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs @@ -199,8 +199,10 @@ public void NumberValue(string json, double expectedValue) [TestCase("1e-")] [TestCase("--")] [TestCase("--1")] +#if !NET5_0 [TestCase("-1.7977e308")] [TestCase("1.7977e308")] +#endif public void InvalidNumberValue(string json) { AssertThrowsAfter(json); diff --git a/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs b/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs index da7b4a8c077a..22adcaa95d93 100644 --- a/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs +++ b/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs @@ -141,7 +141,7 @@ public void LegacyGeneratedCodeThrowsWithIBufferWriter() }; var exception = Assert.Throws(() => { - WriteContext.Initialize(new ArrayBufferWriter(), out WriteContext writeCtx); + WriteContext.Initialize(new TestArrayBufferWriter(), out WriteContext writeCtx); ((IBufferMessage)message).InternalWriteTo(ref writeCtx); }); Assert.AreEqual($"Message {typeof(LegacyGeneratedCodeMessageA).Name} doesn't provide the generated method that enables WriteContext-based serialization. You might need to regenerate the generated protobuf code.", exception.Message); diff --git a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs index 36a2f0222946..65d2fe03954f 100644 --- a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs +++ b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs @@ -83,7 +83,7 @@ public static void AssertReadingMessage(MessageParser parser, byte[] bytes, Acti var bytes = message.ToByteArray(); // also serialize using IBufferWriter and check it leads to the same data - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); message.WriteTo(bufferWriter); Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray(), "Both serialization approaches need to result in the same data."); @@ -112,7 +112,7 @@ public static void AssertWritingMessage(IMessage message) Assert.AreEqual(message.CalculateSize(), bytes.Length); // serialize using IBufferWriter and check it leads to the same output - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); message.WriteTo(bufferWriter); Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray()); @@ -124,7 +124,7 @@ public static void AssertWritingMessage(IMessage message) // test for different IBufferWriter.GetSpan() segment sizes for (int blockSize = 1; blockSize < 256; blockSize *= 2) { - var segmentedBufferWriter = new ArrayBufferWriter(); + var segmentedBufferWriter = new TestArrayBufferWriter(); segmentedBufferWriter.MaxGrowBy = blockSize; message.WriteTo(segmentedBufferWriter); Assert.AreEqual(bytes, segmentedBufferWriter.WrittenSpan.ToArray()); diff --git a/csharp/src/Google.Protobuf/Google.Protobuf.csproj b/csharp/src/Google.Protobuf/Google.Protobuf.csproj index f16063418b8e..4dbb349307a9 100644 --- a/csharp/src/Google.Protobuf/Google.Protobuf.csproj +++ b/csharp/src/Google.Protobuf/Google.Protobuf.csproj @@ -1,4 +1,4 @@ - + C# runtime library for Protocol Buffers - Google's data interchange format. @@ -8,7 +8,7 @@ 7.2 Google Inc. - netstandard1.1;netstandard2.0;net45 + netstandard1.1;netstandard2.0;net45;net50 true ../../keys/Google.Protobuf.snk true @@ -27,15 +27,23 @@ $(DefineConstants);GOOGLE_PROTOBUF_SUPPORT_FAST_STRING + + $(DefineConstants);GOOGLE_PROTOBUF_SUPPORT_FAST_STRING;GOOGLE_PROTOBUF_SIMD + + - - + + + + + + diff --git a/csharp/src/Google.Protobuf/WritingPrimitives.cs b/csharp/src/Google.Protobuf/WritingPrimitives.cs index 846df7350260..823cdb7f49db 100644 --- a/csharp/src/Google.Protobuf/WritingPrimitives.cs +++ b/csharp/src/Google.Protobuf/WritingPrimitives.cs @@ -34,6 +34,11 @@ using System.Buffers.Binary; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if NET5_0 +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; +#endif using System.Security; using System.Text; @@ -45,8 +50,11 @@ namespace Google.Protobuf [SecuritySafeCritical] internal static class WritingPrimitives { - // "Local" copy of Encoding.UTF8, for efficiency. (Yes, it makes a difference.) - internal static readonly Encoding Utf8Encoding = Encoding.UTF8; +#if NET5_0 + internal static Encoding Utf8Encoding => Encoding.UTF8; // allows JIT to devirtualize +#else + internal static readonly Encoding Utf8Encoding = Encoding.UTF8; // "Local" copy of Encoding.UTF8, for efficiency. (Yes, it makes a difference.) +#endif #region Writing of values (not including tags) @@ -171,9 +179,37 @@ public static void WriteString(ref Span buffer, ref WriterInternalState st { if (length == value.Length) // Must be all ASCII... { - for (int i = 0; i < length; i++) + // If 64bit, and value has at least 4 chars, process 4 chars at a time. + if (IntPtr.Size == 8 && value.Length >= 4) + { + ref byte sourceBytes = ref Unsafe.As(ref MemoryMarshal.GetReference(value.AsSpan())); + ref byte destinationBytes = ref MemoryMarshal.GetReference(buffer.Slice(state.position)); + + // Process 4 chars at a time until there are less than 4 remaining. + // We already know all characters are ASCII so there is no need to validate the source. + int lastIndexWhereCanReadFourChars = value.Length - 4; + int currentIndex = 0; + do + { + NarrowFourUtf16CharsToAsciiAndWriteToBuffer( + ref Unsafe.AddByteOffset(ref destinationBytes, (IntPtr)currentIndex), + Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref sourceBytes, (IntPtr)(currentIndex * 2)))); + + } while ((currentIndex += 4) <= lastIndexWhereCanReadFourChars); + + // Process 3 chars or less remainder. We already have refs to source and destination + // so use them to perform array operations without bounds checks. + for (; currentIndex < length; currentIndex++) + { + Unsafe.AddByteOffset(ref destinationBytes, (IntPtr)currentIndex) = Unsafe.AddByteOffset(ref sourceBytes, (IntPtr)(currentIndex * 2)); + } + } + else { - buffer[state.position + i] = (byte)value[i]; + for (int i = 0; i < length; i++) + { + buffer[state.position + i] = (byte)value[i]; + } } state.position += length; } @@ -209,6 +245,61 @@ public static void WriteString(ref Span buffer, ref WriterInternalState st } } + // Copied with permission from https://github.com/dotnet/runtime/blob/1cdafd27e4afd2c916af5df949c13f8b373c4335/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs#L1119-L1171 + // + /// + /// Given a QWORD which represents a buffer of 4 ASCII chars in machine-endian order, + /// narrows each WORD to a BYTE, then writes the 4-byte result to the output buffer + /// also in machine-endian order. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, ulong value) + { +#if GOOGLE_PROTOBUF_SIMD + if (Sse2.X64.IsSupported) + { + // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes + // [ b0 b1 b2 b3 b0 b1 b2 b3 ], then writes 4 bytes (32 bits) to the destination. + + Vector128 vecWide = Sse2.X64.ConvertScalarToVector128UInt64(value).AsInt16(); + Vector128 vecNarrow = Sse2.PackUnsignedSaturate(vecWide, vecWide).AsUInt32(); + Unsafe.WriteUnaligned(ref outputBuffer, Sse2.ConvertToUInt32(vecNarrow)); + } + else if (AdvSimd.IsSupported) + { + // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes + // [ b0 b1 b2 b3 * * * * ], then writes 4 bytes (32 bits) to the destination. + + Vector128 vecWide = Vector128.CreateScalarUnsafe(value).AsInt16(); + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(vecWide); + Unsafe.WriteUnaligned(ref outputBuffer, lower.AsUInt32().ToScalar()); + } + else +#endif + { + if (BitConverter.IsLittleEndian) + { + outputBuffer = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 1) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 2) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 3) = (byte)value; + } + else + { + Unsafe.Add(ref outputBuffer, 3) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 2) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 1) = (byte)value; + value >>= 16; + outputBuffer = (byte)value; + } + } + } + /// /// Write a byte string, without a tag, to the stream. /// The data is length-prefixed. @@ -278,9 +369,9 @@ public static void WriteLength(ref Span buffer, ref WriterInternalState st WriteRawVarint32(ref buffer, ref state, (uint)length); } - #endregion +#endregion - #region Writing primitives +#region Writing primitives /// /// Writes a 32 bit value as a varint. The fast route is taken when /// there's enough buffer space left to whizz through without checking @@ -463,9 +554,9 @@ public static void WriteRawBytes(ref Span buffer, ref WriterInternalState state.position += remainderLength; } } - #endregion +#endregion - #region Raw tag writing +#region Raw tag writing /// /// Encodes and writes a tag. /// @@ -594,7 +685,7 @@ private static void WriteRawTagSlowPath(ref Span buffer, ref WriterInterna WriteRawByte(ref buffer, ref state, b4); WriteRawByte(ref buffer, ref state, b5); } - #endregion +#endregion /// /// Encode a 32-bit value with ZigZag encoding.