diff --git a/csharp/install_dotnet_sdk.ps1 b/csharp/install_dotnet_sdk.ps1 index 8bc967c9f4d..c78655cc027 100755 --- a/csharp/install_dotnet_sdk.ps1 +++ b/csharp/install_dotnet_sdk.ps1 @@ -17,4 +17,4 @@ Invoke-WebRequest -Uri $InstallScriptUrl -OutFile $InstallScriptPath # The SDK versions to install should be kept in sync with versions # installed by kokoro/linux/dockerfile/test/csharp/Dockerfile &$InstallScriptPath -Version 2.1.802 -&$InstallScriptPath -Version 3.1.301 +&$InstallScriptPath -Version 5.0.102 diff --git a/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj b/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj index 73042f358c4..ac8e009adc3 100644 --- a/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj +++ b/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj @@ -2,7 +2,7 @@ Exe - netcoreapp3.1 + net5.0 ../../keys/Google.Protobuf.snk true False diff --git a/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs b/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs index 5b9913b2903..a36a9513c72 100644 --- a/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs +++ b/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs @@ -42,7 +42,7 @@ namespace Google.Protobuf.Buffers /// ArrayBufferWriter is originally from corefx, and has been contributed to Protobuf /// https://github.com/dotnet/runtime/blob/071da4c41aa808c949a773b92dca6f88de9d11f3/src/libraries/Common/src/System/Buffers/ArrayBufferWriter.cs /// - internal sealed class ArrayBufferWriter : IBufferWriter + internal sealed class TestArrayBufferWriter : IBufferWriter { private T[] _buffer; private int _index; @@ -50,10 +50,10 @@ internal sealed class ArrayBufferWriter : IBufferWriter private const int DefaultInitialBufferSize = 256; /// - /// Creates an instance of an , in which data can be written to, + /// Creates an instance of an , in which data can be written to, /// with the default initial capacity. /// - public ArrayBufferWriter() + public TestArrayBufferWriter() { _buffer = new T[0]; _index = 0; @@ -66,14 +66,14 @@ public ArrayBufferWriter() public int? MaxGrowBy { get; set; } /// - /// Creates an instance of an , in which data can be written to, + /// Creates an instance of an , in which data can be written to, /// with an initial capacity specified. /// /// The minimum capacity with which to initialize the underlying buffer. /// /// Thrown when is not positive (i.e. less than or equal to 0). /// - public ArrayBufferWriter(int initialCapacity) + public TestArrayBufferWriter(int initialCapacity) { if (initialCapacity <= 0) throw new ArgumentException(nameof(initialCapacity)); @@ -111,7 +111,7 @@ public ArrayBufferWriter(int initialCapacity) /// Clears the data written to the underlying buffer. /// /// - /// You must clear the before trying to re-use it. + /// You must clear the before trying to re-use it. /// public void Clear() { diff --git a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs index 1e5333c9650..14440098f13 100644 --- a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs +++ b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs @@ -58,7 +58,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) Assert.AreEqual(data, rawOutput.ToArray()); // IBufferWriter - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt32((uint) value); ctx.Flush(); @@ -77,7 +77,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) Assert.AreEqual(data, rawOutput.ToArray()); // IBufferWriter - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt64(value); ctx.Flush(); @@ -100,7 +100,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = bufferSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt32((uint) value); @@ -115,7 +115,7 @@ private static void AssertWriteVarint(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = bufferSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteUInt64(value); @@ -174,7 +174,7 @@ private static void AssertWriteLittleEndian32(byte[] data, uint value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed32(value); ctx.Flush(); @@ -190,7 +190,7 @@ private static void AssertWriteLittleEndian32(byte[] data, uint value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = bufferSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed32(value); @@ -212,7 +212,7 @@ private static void AssertWriteLittleEndian64(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed64(value); ctx.Flush(); @@ -228,7 +228,7 @@ private static void AssertWriteLittleEndian64(byte[] data, ulong value) output.Flush(); Assert.AreEqual(data, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = blockSize; WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteFixed64(value); @@ -270,7 +270,7 @@ public void WriteWholeMessage_VaryingBlockSizes() output.Flush(); Assert.AreEqual(rawBytes, rawOutput.ToArray()); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); bufferWriter.MaxGrowBy = blockSize; message.WriteTo(bufferWriter); Assert.AreEqual(rawBytes, bufferWriter.WrittenSpan.ToArray()); @@ -292,7 +292,7 @@ public void WriteContext_WritesWithFlushes() output.Flush(); byte[] expectedBytes2 = expectedOutput.ToArray(); - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); WriteContext.Initialize(bufferWriter, out WriteContext ctx); ctx.WriteMessage(message); ctx.Flush(); @@ -519,7 +519,21 @@ public void Dispose_FromByteArray() } [Test] - public void WriteStringsOfDifferentSizes() + public void WriteString_AsciiSmall_MaxUtf8SizeExceedsBuffer() + { + var buffer = new byte[5]; + var output = new CodedOutputStream(buffer); + output.WriteString("ABC"); + + output.Flush(); + + // Verify written content + var input = new CodedInputStream(buffer); + Assert.AreEqual("ABC", input.ReadString()); + } + + [Test] + public void WriteStringsOfDifferentSizes_Ascii() { for (int i = 1; i <= 1024; i++) { @@ -540,5 +554,30 @@ public void WriteStringsOfDifferentSizes() Assert.AreEqual(s, input.ReadString()); } } + + [Test] + public void WriteStringsOfDifferentSizes_Unicode() + { + for (int i = 1; i <= 1024; i++) + { + var buffer = new byte[4096]; + var output = new CodedOutputStream(buffer); + var sb = new StringBuilder(); + for (int j = 0; j < i; j++) + { + char c = (char)((j % 10) + 10112); + sb.Append(c.ToString()); // incrementing unicode numbers, repeating + } + var s = sb.ToString(); + output.WriteString(s); + + output.Flush(); + + // Verify written content + var input = new CodedInputStream(buffer); + + Assert.AreEqual(s, input.ReadString()); + } + } } } \ No newline at end of file diff --git a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj index 7bd3f84e459..cdfa98e0987 100644 --- a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj +++ b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj @@ -1,7 +1,7 @@  - net451;netcoreapp2.1 + net451;netcoreapp2.1;net50 ../../keys/Google.Protobuf.snk true False diff --git a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs index e170fcc5a09..69c9eb6e998 100644 --- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs @@ -551,9 +551,13 @@ public void NumberToDouble_Valid(string jsonValue, double expectedParsedValue) } [Test] + // Skip these test cases in .NET 5 because floating point parsing supports bigger values. + // These big values won't throw an error in the test. +#if !NET5_0 [TestCase("1.7977e308")] [TestCase("-1.7977e308")] [TestCase("1e309")] +#endif [TestCase("1,0")] [TestCase("1.0.0")] [TestCase("+1")] diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs index df43effd4ff..0cbc0a4ff8e 100644 --- a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs @@ -199,8 +199,12 @@ public void NumberValue(string json, double expectedValue) [TestCase("1e-")] [TestCase("--")] [TestCase("--1")] + // Skip these test cases in .NET 5 because floating point parsing supports bigger values. + // These big values won't throw an error in the test. +#if !NET5_0 [TestCase("-1.7977e308")] [TestCase("1.7977e308")] +#endif public void InvalidNumberValue(string json) { AssertThrowsAfter(json); diff --git a/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs b/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs index da7b4a8c077..22adcaa95d9 100644 --- a/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs +++ b/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs @@ -141,7 +141,7 @@ public void LegacyGeneratedCodeThrowsWithIBufferWriter() }; var exception = Assert.Throws(() => { - WriteContext.Initialize(new ArrayBufferWriter(), out WriteContext writeCtx); + WriteContext.Initialize(new TestArrayBufferWriter(), out WriteContext writeCtx); ((IBufferMessage)message).InternalWriteTo(ref writeCtx); }); Assert.AreEqual($"Message {typeof(LegacyGeneratedCodeMessageA).Name} doesn't provide the generated method that enables WriteContext-based serialization. You might need to regenerate the generated protobuf code.", exception.Message); diff --git a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs index 36a2f022294..65d2fe03954 100644 --- a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs +++ b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs @@ -83,7 +83,7 @@ public static void AssertReadingMessage(MessageParser parser, byte[] bytes, Acti var bytes = message.ToByteArray(); // also serialize using IBufferWriter and check it leads to the same data - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); message.WriteTo(bufferWriter); Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray(), "Both serialization approaches need to result in the same data."); @@ -112,7 +112,7 @@ public static void AssertWritingMessage(IMessage message) Assert.AreEqual(message.CalculateSize(), bytes.Length); // serialize using IBufferWriter and check it leads to the same output - var bufferWriter = new ArrayBufferWriter(); + var bufferWriter = new TestArrayBufferWriter(); message.WriteTo(bufferWriter); Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray()); @@ -124,7 +124,7 @@ public static void AssertWritingMessage(IMessage message) // test for different IBufferWriter.GetSpan() segment sizes for (int blockSize = 1; blockSize < 256; blockSize *= 2) { - var segmentedBufferWriter = new ArrayBufferWriter(); + var segmentedBufferWriter = new TestArrayBufferWriter(); segmentedBufferWriter.MaxGrowBy = blockSize; message.WriteTo(segmentedBufferWriter); Assert.AreEqual(bytes, segmentedBufferWriter.WrittenSpan.ToArray()); diff --git a/csharp/src/Google.Protobuf/Google.Protobuf.csproj b/csharp/src/Google.Protobuf/Google.Protobuf.csproj index f16063418b8..4dbb349307a 100644 --- a/csharp/src/Google.Protobuf/Google.Protobuf.csproj +++ b/csharp/src/Google.Protobuf/Google.Protobuf.csproj @@ -1,4 +1,4 @@ - + C# runtime library for Protocol Buffers - Google's data interchange format. @@ -8,7 +8,7 @@ 7.2 Google Inc. - netstandard1.1;netstandard2.0;net45 + netstandard1.1;netstandard2.0;net45;net50 true ../../keys/Google.Protobuf.snk true @@ -27,15 +27,23 @@ $(DefineConstants);GOOGLE_PROTOBUF_SUPPORT_FAST_STRING + + $(DefineConstants);GOOGLE_PROTOBUF_SUPPORT_FAST_STRING;GOOGLE_PROTOBUF_SIMD + + - - + + + + + + diff --git a/csharp/src/Google.Protobuf/WritingPrimitives.cs b/csharp/src/Google.Protobuf/WritingPrimitives.cs index cf8fc7ba71f..8beefc54c57 100644 --- a/csharp/src/Google.Protobuf/WritingPrimitives.cs +++ b/csharp/src/Google.Protobuf/WritingPrimitives.cs @@ -32,8 +32,14 @@ using System; using System.Buffers.Binary; +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if GOOGLE_PROTOBUF_SIMD +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; +#endif using System.Security; using System.Text; @@ -45,8 +51,11 @@ namespace Google.Protobuf [SecuritySafeCritical] internal static class WritingPrimitives { - // "Local" copy of Encoding.UTF8, for efficiency. (Yes, it makes a difference.) - internal static readonly Encoding Utf8Encoding = Encoding.UTF8; +#if NET5_0 + internal static Encoding Utf8Encoding => Encoding.UTF8; // allows JIT to devirtualize +#else + internal static readonly Encoding Utf8Encoding = Encoding.UTF8; // "Local" copy of Encoding.UTF8, for efficiency. (Yes, it makes a difference.) +#endif #region Writing of values (not including tags) @@ -186,11 +195,7 @@ public static void WriteString(ref Span buffer, ref WriterInternalState st { if (length == value.Length) // Must be all ASCII... { - for (int i = 0; i < length; i++) - { - buffer[state.position + i] = (byte)value[i]; - } - state.position += length; + WriteAsciiStringToBuffer(buffer, ref state, value, length); } else { @@ -208,6 +213,104 @@ public static void WriteString(ref Span buffer, ref WriterInternalState st } } + // Calling this method with non-ASCII content will break. + // Content must be verified to be all ASCII before using this method. + private static void WriteAsciiStringToBuffer(Span buffer, ref WriterInternalState state, string value, int length) + { + ref char sourceChars = ref MemoryMarshal.GetReference(value.AsSpan()); + ref byte destinationBytes = ref MemoryMarshal.GetReference(buffer.Slice(state.position)); + + int currentIndex = 0; + // If 64bit, process 4 chars at a time. + // The logic inside this check will be elided by JIT in 32bit programs. + if (IntPtr.Size == 8) + { + // Need at least 4 chars available to use this optimization. + if (length >= 4) + { + ref byte sourceBytes = ref Unsafe.As(ref sourceChars); + + // Process 4 chars at a time until there are less than 4 remaining. + // We already know all characters are ASCII so there is no need to validate the source. + int lastIndexWhereCanReadFourChars = value.Length - 4; + do + { + NarrowFourUtf16CharsToAsciiAndWriteToBuffer( + ref Unsafe.AddByteOffset(ref destinationBytes, (IntPtr)currentIndex), + Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref sourceBytes, (IntPtr)(currentIndex * 2)))); + + } while ((currentIndex += 4) <= lastIndexWhereCanReadFourChars); + } + } + + // Process any remaining, 1 char at a time. + // Avoid bounds checking with ref + Unsafe + for (; currentIndex < length; currentIndex++) + { + Unsafe.AddByteOffset(ref destinationBytes, (IntPtr)currentIndex) = (byte)Unsafe.AddByteOffset(ref sourceChars, (IntPtr)(currentIndex * 2)); + } + + state.position += length; + } + + // Copied with permission from https://github.com/dotnet/runtime/blob/1cdafd27e4afd2c916af5df949c13f8b373c4335/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs#L1119-L1171 + // + /// + /// Given a QWORD which represents a buffer of 4 ASCII chars in machine-endian order, + /// narrows each WORD to a BYTE, then writes the 4-byte result to the output buffer + /// also in machine-endian order. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, ulong value) + { +#if GOOGLE_PROTOBUF_SIMD + if (Sse2.X64.IsSupported) + { + // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes + // [ b0 b1 b2 b3 b0 b1 b2 b3 ], then writes 4 bytes (32 bits) to the destination. + + Vector128 vecWide = Sse2.X64.ConvertScalarToVector128UInt64(value).AsInt16(); + Vector128 vecNarrow = Sse2.PackUnsignedSaturate(vecWide, vecWide).AsUInt32(); + Unsafe.WriteUnaligned(ref outputBuffer, Sse2.ConvertToUInt32(vecNarrow)); + } + else if (AdvSimd.IsSupported) + { + // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes + // [ b0 b1 b2 b3 * * * * ], then writes 4 bytes (32 bits) to the destination. + + Vector128 vecWide = Vector128.CreateScalarUnsafe(value).AsInt16(); + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(vecWide); + Unsafe.WriteUnaligned(ref outputBuffer, lower.AsUInt32().ToScalar()); + } + else +#endif + { + // Fallback to non-SIMD approach when SIMD is not available. + // This could happen either because the APIs are not available, or hardware doesn't support it. + // Processing 4 chars at a time in this fallback is still faster than casting one char at a time. + if (BitConverter.IsLittleEndian) + { + outputBuffer = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 1) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 2) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 3) = (byte)value; + } + else + { + Unsafe.Add(ref outputBuffer, 3) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 2) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 1) = (byte)value; + value >>= 16; + outputBuffer = (byte)value; + } + } + } + private static int WriteStringToBuffer(Span buffer, ref WriterInternalState state, string value) { #if NETSTANDARD1_1 diff --git a/global.json b/global.json index 16f71a741fa..d29e29a3ea9 100644 --- a/global.json +++ b/global.json @@ -1,6 +1,6 @@ { "sdk": { - "version": "3.0.100", + "version": "5.0.102", "rollForward": "latestMinor" } } diff --git a/kokoro/linux/dockerfile/test/csharp/Dockerfile b/kokoro/linux/dockerfile/test/csharp/Dockerfile index 95bd653152f..37edbfda999 100644 --- a/kokoro/linux/dockerfile/test/csharp/Dockerfile +++ b/kokoro/linux/dockerfile/test/csharp/Dockerfile @@ -1,4 +1,4 @@ -FROM debian:stretch +FROM debian:buster # Install dependencies. We start with the basic ones require to build protoc # and the C++ build @@ -22,14 +22,18 @@ RUN apt-get update && apt-get install -y \ wget \ && apt-get clean +# Update ca-certificates to fix known buster + .NET 5 issue +# https://github.com/NuGet/Announcements/issues/49 +RUN apt-get update && apt-get install -y ca-certificates && apt-get clean + # dotnet SDK prerequisites -RUN apt-get update && apt-get install -y libunwind8 libicu57 && apt-get clean +RUN apt-get update && apt-get install -y libunwind8 libicu63 && apt-get clean # Install dotnet SDK via install script RUN wget -q https://dot.net/v1/dotnet-install.sh && \ chmod u+x dotnet-install.sh && \ ./dotnet-install.sh --version 2.1.802 && \ - ./dotnet-install.sh --version 3.1.301 && \ + ./dotnet-install.sh --version 5.0.102 && \ ln -s /root/.dotnet/dotnet /usr/local/bin RUN wget -q www.nuget.org/NuGet.exe -O /usr/local/bin/nuget.exe