From fd117d005514c0cabd75cceddd227ba8fa3d721d Mon Sep 17 00:00:00 2001 From: James Newton-King Date: Wed, 16 Dec 2020 12:56:14 +1300 Subject: [PATCH] Improve WriteString perf with SIMD --- .../Google.Protobuf.Benchmarks.csproj | 2 +- .../Buffers/ArrayBufferWriter.cs | 12 +- .../CodedOutputStreamTest.cs | 47 ++++++-- .../Google.Protobuf.Test.csproj | 2 +- .../Google.Protobuf.Test/JsonParserTest.cs | 2 + .../Google.Protobuf.Test/JsonTokenizerTest.cs | 2 + .../LegacyGeneratedCodeTest.cs | 2 +- .../MessageParsingHelpers.cs | 6 +- .../Google.Protobuf/Google.Protobuf.csproj | 16 ++- .../src/Google.Protobuf/WritingPrimitives.cs | 111 ++++++++++++++++-- global.json | 2 +- .../linux/dockerfile/test/csharp/Dockerfile | 2 +- 12 files changed, 168 insertions(+), 38 deletions(-) diff --git a/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj b/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj index 73042f358c47..ac8e009adc3d 100644 --- a/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj +++ b/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj @@ -2,7 +2,7 @@ Exe - netcoreapp3.1 + net5.0 ../../keys/Google.Protobuf.snk true False diff --git a/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs b/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs index 5b9913b29035..a36a9513c72a 100644 --- a/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs +++ b/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs @@ -42,7 +42,7 @@ namespace Google.Protobuf.Buffers /// ArrayBufferWriter is originally from corefx, and has been contributed to Protobuf /// https://github.com/dotnet/runtime/blob/071da4c41aa808c949a773b92dca6f88de9d11f3/src/libraries/Common/src/System/Buffers/ArrayBufferWriter.cs /// - internal sealed class ArrayBufferWriter : IBufferWriter + internal sealed class TestArrayBufferWriter : IBufferWriter { private T[] _buffer; private int _index; @@ -50,10 +50,10 @@ internal sealed class ArrayBufferWriter : IBufferWriter private const int DefaultInitialBufferSize = 256; /// - /// Creates an instance of an , in which data can be written to, + /// Creates an instance of an , in which data can be written to, /// with the default initial capacity. /// - public ArrayBufferWriter() + public TestArrayBufferWriter() { _buffer = new T[0]; _index = 0; @@ -66,14 +66,14 @@ public ArrayBufferWriter() public int? MaxGrowBy { get; set; } /// - /// Creates an instance of an , in which data can be written to, + /// Creates an instance of an , in which data can be written to, /// with an initial capacity specified. /// /// The minimum capacity with which to initialize the underlying buffer. /// /// Thrown when is not positive (i.e. less than or equal to 0). /// - public ArrayBufferWriter(int initialCapacity) + public TestArrayBufferWriter(int initialCapacity) { if (initialCapacity <= 0) throw new ArgumentException(nameof(initialCapacity)); @@ -111,7 +111,7 @@ public ArrayBufferWriter(int initialCapacity) /// Clears the data written to the underlying buffer. /// /// - /// You must clear the before trying to re-use it. + /// You must clear the before trying to re-use it. /// public void Clear() { diff --git a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs index 1e5333c96501..d77be44b822a 100644 --- a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs +++ b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs @@ -58,7 +58,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) Assert.AreEqual(data, rawOutput.ToArray()); // IBufferWriter - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt32((uint) value); ctx.Flush(); @@ -77,7 +77,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) Assert.AreEqual(data, rawOutput.ToArray()); // IBufferWriter - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt64(value); ctx.Flush(); @@ -100,7 +100,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = bufferSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt32((uint) value); @@ -115,7 +115,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = bufferSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt64(value); @@ -174,7 +174,7 @@ private static void AssertWriteLittleEndian32(byte[] data, uint value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed32(value); ctx.Flush(); @@ -190,7 +190,7 @@ private static void AssertWriteLittleEndian32(byte[] data, uint value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = bufferSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed32(value); @@ -212,7 +212,7 @@ private static void AssertWriteLittleEndian64(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed64(value); ctx.Flush(); @@ -228,7 +228,7 @@ private static void AssertWriteLittleEndian64(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = blockSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed64(value); @@ -270,7 +270,7 @@ public void WriteWholeMessage_VaryingBlockSizes() output.Flush(); Assert.AreEqual(rawBytes, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = blockSize; message.WriteTo(bufferWriter); Assert.AreEqual(rawBytes, bufferWriter.WrittenSpan.ToArray()); @@ -292,7 +292,7 @@ public void WriteContext_WritesWithFlushes() output.Flush(); byte[] expectedBytes2 = expectedOutput.ToArray(); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteMessage(message); ctx.Flush(); @@ -519,7 +519,7 @@ public void Dispose_FromByteArray() } [Test] - public void WriteStringsOfDifferentSizes() + public void WriteStringsOfDifferentSizes_Ascii() { for (int i = 1; i <= 1024; i++) { @@ -540,5 +540,30 @@ public void WriteStringsOfDifferentSizes() Assert.AreEqual(s, input.ReadString()); } } + + [Test] + public void WriteStringsOfDifferentSizes_Unicode() + { + for (int i = 1; i <= 1024; i++) + { + var buffer = new byte[4096]; + var output = new CodedOutputStream(buffer); + var sb = new StringBuilder(); + for (int j = 0; j < i; j++) + { + char c = (char)((j % 10) + 10112); + sb.Append(c.ToString()); // incrementing unicode numbers, repeating + } + var s = sb.ToString(); + output.WriteString(s); + + output.Flush(); + + // Verify written content + var input = new CodedInputStream(buffer); + + Assert.AreEqual(s, input.ReadString()); + } + } } } \ No newline at end of file diff --git a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj index 7bd3f84e459e..cdfa98e09874 100644 --- a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj +++ b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj @@ -1,7 +1,7 @@  - net451;netcoreapp2.1 + net451;netcoreapp2.1;net50 ../../keys/Google.Protobuf.snk true False diff --git a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs index e170fcc5a09c..87a389aecbb5 100644 --- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs @@ -551,9 +551,11 @@ public void NumberToDouble_Valid(string jsonValue, double expectedParsedValue) } [Test] +#if !NET5_0 [TestCase("1.7977e308")] [TestCase("-1.7977e308")] [TestCase("1e309")] +#endif [TestCase("1,0")] [TestCase("1.0.0")] [TestCase("+1")] diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs index df43effd4ff0..55ec02ea023f 100644 --- a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs @@ -199,8 +199,10 @@ public void NumberValue(string json, double expectedValue) [TestCase("1e-")] [TestCase("--")] [TestCase("--1")] +#if !NET5_0 [TestCase("-1.7977e308")] [TestCase("1.7977e308")] +#endif public void InvalidNumberValue(string json) { AssertThrowsAfter(json); diff --git a/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs b/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs index da7b4a8c077a..22adcaa95d93 100644 --- a/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs +++ b/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs @@ -141,7 +141,7 @@ public void LegacyGeneratedCodeThrowsWithIBufferWriter() }; var exception = Assert.Throws(() => { - WriteContext.Initialize(new ArrayBufferWriter(), out WriteContext writeCtx); + WriteContext.Initialize(new TestArrayBufferWriter(), out WriteContext writeCtx); ((IBufferMessage)message).InternalWriteTo(ref writeCtx); }); Assert.AreEqual($"Message {typeof(LegacyGeneratedCodeMessageA).Name} doesn't provide the generated method that enables WriteContext-based serialization. You might need to regenerate the generated protobuf code.", exception.Message); diff --git a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs index 36a2f0222946..65d2fe03954f 100644 --- a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs +++ b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs @@ -83,7 +83,7 @@ public static void AssertReadingMessage(MessageParser parser, byte[] bytes, Acti var bytes = message.ToByteArray(); // also serialize using IBufferWriter and check it leads to the same data - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); message.WriteTo(bufferWriter); Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray(), "Both serialization approaches need to result in the same data."); @@ -112,7 +112,7 @@ public static void AssertWritingMessage(IMessage message) Assert.AreEqual(message.CalculateSize(), bytes.Length); // serialize using IBufferWriter and check it leads to the same output - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); message.WriteTo(bufferWriter); Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray()); @@ -124,7 +124,7 @@ public static void AssertWritingMessage(IMessage message) // test for different IBufferWriter.GetSpan() segment sizes for (int blockSize = 1; blockSize < 256; blockSize *= 2) { - var segmentedBufferWriter = new ArrayBufferWriter(); + var segmentedBufferWriter = new TestArrayBufferWriter(); segmentedBufferWriter.MaxGrowBy = blockSize; message.WriteTo(segmentedBufferWriter); Assert.AreEqual(bytes, segmentedBufferWriter.WrittenSpan.ToArray()); diff --git a/csharp/src/Google.Protobuf/Google.Protobuf.csproj b/csharp/src/Google.Protobuf/Google.Protobuf.csproj index f16063418b8e..4dbb349307a9 100644 --- a/csharp/src/Google.Protobuf/Google.Protobuf.csproj +++ b/csharp/src/Google.Protobuf/Google.Protobuf.csproj @@ -1,4 +1,4 @@ - + C# runtime library for Protocol Buffers - Google's data interchange format. @@ -8,7 +8,7 @@ 7.2 Google Inc. - netstandard1.1;netstandard2.0;net45 + netstandard1.1;netstandard2.0;net45;net50 true ../../keys/Google.Protobuf.snk true @@ -27,15 +27,23 @@ $(DefineConstants);GOOGLE_PROTOBUF_SUPPORT_FAST_STRING + + $(DefineConstants);GOOGLE_PROTOBUF_SUPPORT_FAST_STRING;GOOGLE_PROTOBUF_SIMD + + - - + + + + + + diff --git a/csharp/src/Google.Protobuf/WritingPrimitives.cs b/csharp/src/Google.Protobuf/WritingPrimitives.cs index cf8fc7ba71fd..b6ed90cdf5f6 100644 --- a/csharp/src/Google.Protobuf/WritingPrimitives.cs +++ b/csharp/src/Google.Protobuf/WritingPrimitives.cs @@ -34,6 +34,11 @@ using System.Buffers.Binary; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if NET5_0 +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; +#endif using System.Security; using System.Text; @@ -45,8 +50,11 @@ namespace Google.Protobuf [SecuritySafeCritical] internal static class WritingPrimitives { - // "Local" copy of Encoding.UTF8, for efficiency. (Yes, it makes a difference.) - internal static readonly Encoding Utf8Encoding = Encoding.UTF8; +#if NET5_0 + internal static Encoding Utf8Encoding => Encoding.UTF8; // allows JIT to devirtualize +#else + internal static readonly Encoding Utf8Encoding = Encoding.UTF8; // "Local" copy of Encoding.UTF8, for efficiency. (Yes, it makes a difference.) +#endif #region Writing of values (not including tags) @@ -186,10 +194,21 @@ public static void WriteString(ref Span buffer, ref WriterInternalState st { if (length == value.Length) // Must be all ASCII... { - for (int i = 0; i < length; i++) + ref char sourceChars = ref MemoryMarshal.GetReference(value.AsSpan()); + ref byte destinationBytes = ref MemoryMarshal.GetReference(buffer.Slice(state.position)); + + // If 64bit, process 4 chars at a time. + int currentIndex = IntPtr.Size == 8 + ? WriteAsciiStringToBuffer(ref sourceChars, ref destinationBytes, value, length) + : 0; + + // Process any remaining, 1 char at a time. + // Avoid bounds checking with ref + Unsafe + for (; currentIndex < length; currentIndex++) { - buffer[state.position + i] = (byte)value[i]; + Unsafe.AddByteOffset(ref destinationBytes, (IntPtr)currentIndex) = (byte)Unsafe.AddByteOffset(ref sourceChars, (IntPtr)(currentIndex * 2)); } + state.position += length; } else @@ -208,6 +227,80 @@ public static void WriteString(ref Span buffer, ref WriterInternalState st } } + private static int WriteAsciiStringToBuffer(ref char sourceChars, ref byte destinationBytes, string value, int length) + { + ref byte sourceBytes = ref Unsafe.As(ref sourceChars); + + // Process 4 chars at a time until there are less than 4 remaining. + // We already know all characters are ASCII so there is no need to validate the source. + int lastIndexWhereCanReadFourChars = value.Length - 4; + int currentIndex = 0; + do + { + NarrowFourUtf16CharsToAsciiAndWriteToBuffer( + ref Unsafe.AddByteOffset(ref destinationBytes, (IntPtr)currentIndex), + Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref sourceBytes, (IntPtr)(currentIndex * 2)))); + + } while ((currentIndex += 4) <= lastIndexWhereCanReadFourChars); + + return currentIndex; + } + + // Copied with permission from https://github.com/dotnet/runtime/blob/1cdafd27e4afd2c916af5df949c13f8b373c4335/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs#L1119-L1171 + // + /// + /// Given a QWORD which represents a buffer of 4 ASCII chars in machine-endian order, + /// narrows each WORD to a BYTE, then writes the 4-byte result to the output buffer + /// also in machine-endian order. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, ulong value) + { +#if GOOGLE_PROTOBUF_SIMD + if (Sse2.X64.IsSupported) + { + // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes + // [ b0 b1 b2 b3 b0 b1 b2 b3 ], then writes 4 bytes (32 bits) to the destination. + + Vector128 vecWide = Sse2.X64.ConvertScalarToVector128UInt64(value).AsInt16(); + Vector128 vecNarrow = Sse2.PackUnsignedSaturate(vecWide, vecWide).AsUInt32(); + Unsafe.WriteUnaligned(ref outputBuffer, Sse2.ConvertToUInt32(vecNarrow)); + } + else if (AdvSimd.IsSupported) + { + // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes + // [ b0 b1 b2 b3 * * * * ], then writes 4 bytes (32 bits) to the destination. + + Vector128 vecWide = Vector128.CreateScalarUnsafe(value).AsInt16(); + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(vecWide); + Unsafe.WriteUnaligned(ref outputBuffer, lower.AsUInt32().ToScalar()); + } + else +#endif + { + if (BitConverter.IsLittleEndian) + { + outputBuffer = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 1) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 2) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 3) = (byte)value; + } + else + { + Unsafe.Add(ref outputBuffer, 3) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 2) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 1) = (byte)value; + value >>= 16; + outputBuffer = (byte)value; + } + } + } + private static int WriteStringToBuffer(Span buffer, ref WriterInternalState state, string value) { #if NETSTANDARD1_1 @@ -304,9 +397,9 @@ public static void WriteLength(ref Span buffer, ref WriterInternalState st WriteRawVarint32(ref buffer, ref state, (uint)length); } - #endregion +#endregion - #region Writing primitives +#region Writing primitives /// /// Writes a 32 bit value as a varint. The fast route is taken when /// there's enough buffer space left to whizz through without checking @@ -489,9 +582,9 @@ public static void WriteRawBytes(ref Span buffer, ref WriterInternalState state.position += remainderLength; } } - #endregion +#endregion - #region Raw tag writing +#region Raw tag writing /// /// Encodes and writes a tag. /// @@ -620,7 +713,7 @@ private static void WriteRawTagSlowPath(ref Span buffer, ref WriterInterna WriteRawByte(ref buffer, ref state, b4); WriteRawByte(ref buffer, ref state, b5); } - #endregion +#endregion /// /// Encode a 32-bit value with ZigZag encoding. diff --git a/global.json b/global.json index 16f71a741fa7..d29e29a3ea9d 100644 --- a/global.json +++ b/global.json @@ -1,6 +1,6 @@ { "sdk": { - "version": "3.0.100", + "version": "5.0.102", "rollForward": "latestMinor" } } diff --git a/kokoro/linux/dockerfile/test/csharp/Dockerfile b/kokoro/linux/dockerfile/test/csharp/Dockerfile index 95bd653152f0..0bbadba40ae4 100644 --- a/kokoro/linux/dockerfile/test/csharp/Dockerfile +++ b/kokoro/linux/dockerfile/test/csharp/Dockerfile @@ -29,7 +29,7 @@ RUN apt-get update && apt-get install -y libunwind8 libicu57 && apt-get clean RUN wget -q https://dot.net/v1/dotnet-install.sh && \ chmod u+x dotnet-install.sh && \ ./dotnet-install.sh --version 2.1.802 && \ - ./dotnet-install.sh --version 3.1.301 && \ + ./dotnet-install.sh --version 5.0.102 && \ ln -s /root/.dotnet/dotnet /usr/local/bin RUN wget -q www.nuget.org/NuGet.exe -O /usr/local/bin/nuget.exe