From 5c241eb381b56fa8c6f6a28356486488da25253a Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 21 Jan 2021 09:09:14 +0100 Subject: [PATCH] Merge pull request #5663 from ladipro/switch-interner-to-span Optimize string building and interning Introduces a new assembly Microsoft.NET.StringTools with string interning functionality and a span-based string builder. Evaluation performance, in terms of memory allocations and time, is in single-digit percentage. Detailed description in https://github.com/dotnet/msbuild/pull/5663 --- MSBuild.SourceBuild.slnf | 3 +- MSBuild.sln | 130 +- eng/Packages.props | 1 + scripts/Deploy-MSBuild.ps1 | 1 + ...Microsoft.Build.Engine.OM.UnitTests.csproj | 4 - .../OpportunisticIntern_Tests.cs | 217 ---- .../BackEnd/BuildManager/BuildManager.cs | 6 +- .../BackEnd/BuildManager/BuildParameters.cs | 4 +- .../Solution/SolutionProjectGenerator.cs | 6 +- src/Build/Definition/Project.cs | 8 +- src/Build/Evaluation/Expander.cs | 353 +++--- src/Build/Evaluation/SemiColonTokenizer.cs | 6 +- src/Build/Globbing/MSBuildGlob.cs | 3 +- src/Build/Instance/TaskRegistry.cs | 4 +- src/Build/Microsoft.Build.csproj | 5 +- src/Directory.Build.targets | 2 +- ...Microsoft.Build.Framework.UnitTests.csproj | 5 +- src/MSBuild/MSBuild.csproj | 4 - src/MSBuildTaskHost/MSBuildTaskHost.csproj | 13 +- .../VS.ExternalAPIs.MSBuild.nuspec | 1 + .../MsBuild.Engine.Corext.nuspec | 4 + src/Package/MSBuild.VSSetup/files.swr | 5 + src/Shared/Constants.cs | 2 - src/Shared/EscapingUtilities.cs | 4 +- src/Shared/IInternable.cs | 341 ----- src/Shared/InterningBinaryReader.cs | 41 +- src/Shared/OpportunisticIntern.cs | 1093 ----------------- src/Shared/PropertyParser.cs | 29 +- src/Shared/ReuseableStringBuilder.cs | 82 +- src/Shared/Traits.cs | 10 - src/Shared/WeakStringCache.Concurrent.cs | 137 --- src/StringTools.Benchmark/Program.cs | 15 + .../SpanBasedStringBuilder_Benchmark.cs | 85 ++ .../StringTools.Benchmark.csproj | 22 + .../InterningTestData.cs | 68 + .../SpanBasedStringBuilder_Tests.cs | 154 +++ .../StringTools.UnitTests.csproj | 30 + .../StringTools.UnitTests.net35.csproj | 40 + .../StringTools_Tests.cs | 61 + .../WeakStringCache_Tests.cs | 28 +- src/StringTools/AssemblyInfo.cs | 13 + src/StringTools/InternableString.Simple.cs | 231 ++++ src/StringTools/InternableString.cs | 331 +++++ .../SpanBasedStringBuilder.Simple.cs | 153 +++ src/StringTools/SpanBasedStringBuilder.cs | 261 ++++ src/StringTools/StringTools.cs | 108 ++ src/StringTools/StringTools.csproj | 39 + src/StringTools/StringTools.pkgdef | 7 + src/StringTools/WeakStringCache.Concurrent.cs | 124 ++ .../WeakStringCache.Locking.cs | 12 +- .../WeakStringCache.cs | 40 +- src/StringTools/WeakStringCacheInterner.cs | 180 +++ src/Tasks/ListOperators/FindUnderPath.cs | 6 +- src/Tasks/Microsoft.Build.Tasks.csproj | 13 +- .../Microsoft.Build.UnitTests.Shared.csproj | 12 - .../Microsoft.Build.Utilities.csproj | 15 +- 56 files changed, 2361 insertions(+), 2211 deletions(-) delete mode 100644 src/Build.UnitTests/OpportunisticIntern_Tests.cs delete mode 100644 src/Shared/IInternable.cs delete mode 100644 src/Shared/OpportunisticIntern.cs delete mode 100644 src/Shared/WeakStringCache.Concurrent.cs create mode 100644 src/StringTools.Benchmark/Program.cs create mode 100644 src/StringTools.Benchmark/SpanBasedStringBuilder_Benchmark.cs create mode 100644 src/StringTools.Benchmark/StringTools.Benchmark.csproj create mode 100644 src/StringTools.UnitTests/InterningTestData.cs create mode 100644 src/StringTools.UnitTests/SpanBasedStringBuilder_Tests.cs create mode 100644 src/StringTools.UnitTests/StringTools.UnitTests.csproj create mode 100644 src/StringTools.UnitTests/StringTools.UnitTests.net35.csproj create mode 100644 src/StringTools.UnitTests/StringTools_Tests.cs rename src/{Build.UnitTests => StringTools.UnitTests}/WeakStringCache_Tests.cs (87%) create mode 100644 src/StringTools/AssemblyInfo.cs create mode 100644 src/StringTools/InternableString.Simple.cs create mode 100644 src/StringTools/InternableString.cs create mode 100644 src/StringTools/SpanBasedStringBuilder.Simple.cs create mode 100644 src/StringTools/SpanBasedStringBuilder.cs create mode 100644 src/StringTools/StringTools.cs create mode 100644 src/StringTools/StringTools.csproj create mode 100644 src/StringTools/StringTools.pkgdef create mode 100644 src/StringTools/WeakStringCache.Concurrent.cs rename src/{MSBuildTaskHost => StringTools}/WeakStringCache.Locking.cs (93%) rename src/{Shared => StringTools}/WeakStringCache.cs (83%) create mode 100644 src/StringTools/WeakStringCacheInterner.cs diff --git a/MSBuild.SourceBuild.slnf b/MSBuild.SourceBuild.slnf index 48bd4ec0387..d69d4de7ab3 100644 --- a/MSBuild.SourceBuild.slnf +++ b/MSBuild.SourceBuild.slnf @@ -7,7 +7,8 @@ "src\\MSBuild\\MSBuild.csproj", "src\\Package\\Localization\\Localization.csproj", "src\\Tasks\\Microsoft.Build.Tasks.csproj", - "src\\Utilities\\Microsoft.Build.Utilities.csproj" + "src\\Utilities\\Microsoft.Build.Utilities.csproj", + "src\\StringTools\\StringTools.csproj" ] } } \ No newline at end of file diff --git a/MSBuild.sln b/MSBuild.sln index f58cad8b0d6..74de884bad7 100644 --- a/MSBuild.sln +++ b/MSBuild.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 -VisualStudioVersion = 16.0.30320.27 +VisualStudioVersion = 16.0.30413.136 MinimumVisualStudioVersion = 10.0.40219.1 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{4900B3B8-4310-4D5B-B1F7-2FDF9199765F}" ProjectSection(SolutionItems) = preProject @@ -65,10 +65,18 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MSBuild.Engine.Corext", "sr EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MSBuild.Bootstrap", "src\MSBuild.Bootstrap\MSBuild.Bootstrap.csproj", "{CEAEE4FE-9298-443B-AFC5-0F72472484B6}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "StringTools", "src\StringTools\StringTools.csproj", "{639C178E-368F-4384-869E-7C6D18B4CC1F}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "StringTools.UnitTests", "src\StringTools.UnitTests\StringTools.UnitTests.csproj", "{A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "StringTools.UnitTests.net35", "src\StringTools.UnitTests\StringTools.UnitTests.net35.csproj", "{D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}" +EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Build.UnGAC", "src\Package\Microsoft.Build.UnGAC\Microsoft.Build.UnGAC.csproj", "{B60173F0-F9F0-4688-9DF8-9ADDD57BD45F}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ProjectCachePlugin", "src\Samples\ProjectCachePlugin\ProjectCachePlugin.csproj", "{F47E1A0A-7D81-40CF-B8B3-A0F4B5ADE943}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "StringTools.Benchmark", "src\StringTools.Benchmark\StringTools.Benchmark.csproj", "{65749C80-47E7-42FE-B441-7A86289D46AA}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -838,6 +846,96 @@ Global {CEAEE4FE-9298-443B-AFC5-0F72472484B6}.Release-MONO|x64.Build.0 = Release-MONO|x64 {CEAEE4FE-9298-443B-AFC5-0F72472484B6}.Release-MONO|x86.ActiveCfg = Release-MONO|Any CPU {CEAEE4FE-9298-443B-AFC5-0F72472484B6}.Release-MONO|x86.Build.0 = Release-MONO|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug|x64.ActiveCfg = Debug|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug|x64.Build.0 = Debug|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug|x86.ActiveCfg = Debug|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug|x86.Build.0 = Debug|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug-MONO|Any CPU.ActiveCfg = Debug-MONO|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug-MONO|Any CPU.Build.0 = Debug-MONO|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug-MONO|x64.ActiveCfg = Debug-MONO|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug-MONO|x64.Build.0 = Debug-MONO|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug-MONO|x86.ActiveCfg = Debug-MONO|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Debug-MONO|x86.Build.0 = Debug-MONO|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.MachineIndependent|Any CPU.ActiveCfg = MachineIndependent|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.MachineIndependent|Any CPU.Build.0 = MachineIndependent|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.MachineIndependent|x64.ActiveCfg = MachineIndependent|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.MachineIndependent|x64.Build.0 = MachineIndependent|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.MachineIndependent|x86.ActiveCfg = MachineIndependent|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.MachineIndependent|x86.Build.0 = MachineIndependent|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release|Any CPU.Build.0 = Release|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release|x64.ActiveCfg = Release|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release|x64.Build.0 = Release|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release|x86.ActiveCfg = Release|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release|x86.Build.0 = Release|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release-MONO|Any CPU.ActiveCfg = Release-MONO|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release-MONO|Any CPU.Build.0 = Release-MONO|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release-MONO|x64.ActiveCfg = Release-MONO|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release-MONO|x64.Build.0 = Release-MONO|x64 + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release-MONO|x86.ActiveCfg = Release-MONO|Any CPU + {639C178E-368F-4384-869E-7C6D18B4CC1F}.Release-MONO|x86.Build.0 = Release-MONO|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug|x64.ActiveCfg = Debug|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug|x64.Build.0 = Debug|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug|x86.ActiveCfg = Debug|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug|x86.Build.0 = Debug|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug-MONO|Any CPU.ActiveCfg = Debug-MONO|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug-MONO|Any CPU.Build.0 = Debug-MONO|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug-MONO|x64.ActiveCfg = Debug-MONO|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug-MONO|x64.Build.0 = Debug-MONO|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug-MONO|x86.ActiveCfg = Debug-MONO|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Debug-MONO|x86.Build.0 = Debug-MONO|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.MachineIndependent|Any CPU.ActiveCfg = MachineIndependent|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.MachineIndependent|Any CPU.Build.0 = MachineIndependent|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.MachineIndependent|x64.ActiveCfg = MachineIndependent|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.MachineIndependent|x64.Build.0 = MachineIndependent|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.MachineIndependent|x86.ActiveCfg = MachineIndependent|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.MachineIndependent|x86.Build.0 = MachineIndependent|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release|Any CPU.Build.0 = Release|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release|x64.ActiveCfg = Release|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release|x64.Build.0 = Release|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release|x86.ActiveCfg = Release|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release|x86.Build.0 = Release|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release-MONO|Any CPU.ActiveCfg = Release-MONO|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release-MONO|Any CPU.Build.0 = Release-MONO|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release-MONO|x64.ActiveCfg = Release-MONO|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release-MONO|x64.Build.0 = Release-MONO|x64 + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release-MONO|x86.ActiveCfg = Release-MONO|Any CPU + {A1FF7E97-F98F-4C5C-AD09-0E1CF4A7A4DB}.Release-MONO|x86.Build.0 = Release-MONO|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug|x64.ActiveCfg = Debug|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug|x64.Build.0 = Debug|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug|x86.ActiveCfg = Debug|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug|x86.Build.0 = Debug|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug-MONO|Any CPU.ActiveCfg = Debug-MONO|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug-MONO|Any CPU.Build.0 = Debug-MONO|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug-MONO|x64.ActiveCfg = Debug-MONO|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug-MONO|x64.Build.0 = Debug-MONO|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug-MONO|x86.ActiveCfg = Debug-MONO|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Debug-MONO|x86.Build.0 = Debug-MONO|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.MachineIndependent|Any CPU.ActiveCfg = MachineIndependent|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.MachineIndependent|Any CPU.Build.0 = MachineIndependent|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.MachineIndependent|x64.ActiveCfg = MachineIndependent|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.MachineIndependent|x64.Build.0 = MachineIndependent|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.MachineIndependent|x86.ActiveCfg = MachineIndependent|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.MachineIndependent|x86.Build.0 = MachineIndependent|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release|Any CPU.Build.0 = Release|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release|x64.ActiveCfg = Release|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release|x64.Build.0 = Release|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release|x86.ActiveCfg = Release|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release|x86.Build.0 = Release|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release-MONO|Any CPU.ActiveCfg = Release-MONO|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release-MONO|Any CPU.Build.0 = Release-MONO|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release-MONO|x64.ActiveCfg = Release-MONO|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release-MONO|x64.Build.0 = Release-MONO|x64 + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release-MONO|x86.ActiveCfg = Release-MONO|Any CPU + {D05E5FAF-3E05-48D2-8DEF-FD1A18EB1349}.Release-MONO|x86.Build.0 = Release-MONO|Any CPU {B60173F0-F9F0-4688-9DF8-9ADDD57BD45F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {B60173F0-F9F0-4688-9DF8-9ADDD57BD45F}.Debug|Any CPU.Build.0 = Debug|Any CPU {B60173F0-F9F0-4688-9DF8-9ADDD57BD45F}.Debug|x64.ActiveCfg = Debug|x64 @@ -898,6 +996,36 @@ Global {F47E1A0A-7D81-40CF-B8B3-A0F4B5ADE943}.Release-MONO|x64.Build.0 = Release-MONO|x64 {F47E1A0A-7D81-40CF-B8B3-A0F4B5ADE943}.Release-MONO|x86.ActiveCfg = Release-MONO|Any CPU {F47E1A0A-7D81-40CF-B8B3-A0F4B5ADE943}.Release-MONO|x86.Build.0 = Release-MONO|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug|Any CPU.Build.0 = Debug|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug|x64.ActiveCfg = Debug|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug|x64.Build.0 = Debug|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug|x86.ActiveCfg = Debug|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug|x86.Build.0 = Debug|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug-MONO|Any CPU.ActiveCfg = Debug-MONO|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug-MONO|Any CPU.Build.0 = Debug-MONO|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug-MONO|x64.ActiveCfg = Debug-MONO|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug-MONO|x64.Build.0 = Debug-MONO|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug-MONO|x86.ActiveCfg = Debug-MONO|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Debug-MONO|x86.Build.0 = Debug-MONO|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.MachineIndependent|Any CPU.ActiveCfg = MachineIndependent|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.MachineIndependent|Any CPU.Build.0 = MachineIndependent|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.MachineIndependent|x64.ActiveCfg = MachineIndependent|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.MachineIndependent|x64.Build.0 = MachineIndependent|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.MachineIndependent|x86.ActiveCfg = MachineIndependent|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.MachineIndependent|x86.Build.0 = MachineIndependent|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release|Any CPU.ActiveCfg = Release|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release|Any CPU.Build.0 = Release|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release|x64.ActiveCfg = Release|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release|x64.Build.0 = Release|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release|x86.ActiveCfg = Release|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release|x86.Build.0 = Release|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release-MONO|Any CPU.ActiveCfg = Release-MONO|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release-MONO|Any CPU.Build.0 = Release-MONO|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release-MONO|x64.ActiveCfg = Release-MONO|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release-MONO|x64.Build.0 = Release-MONO|x64 + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release-MONO|x86.ActiveCfg = Release-MONO|Any CPU + {65749C80-47E7-42FE-B441-7A86289D46AA}.Release-MONO|x86.Build.0 = Release-MONO|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/eng/Packages.props b/eng/Packages.props index ebd0d0e551f..a0fb8ec1032 100644 --- a/eng/Packages.props +++ b/eng/Packages.props @@ -1,5 +1,6 @@ + diff --git a/scripts/Deploy-MSBuild.ps1 b/scripts/Deploy-MSBuild.ps1 index 125e9447ca5..47eec2ccff8 100644 --- a/scripts/Deploy-MSBuild.ps1 +++ b/scripts/Deploy-MSBuild.ps1 @@ -58,6 +58,7 @@ $filesToCopyToBin = @( FileToCopy "$bootstrapBinDirectory\Microsoft.Build.Framework.dll" FileToCopy "$bootstrapBinDirectory\Microsoft.Build.Tasks.Core.dll" FileToCopy "$bootstrapBinDirectory\Microsoft.Build.Utilities.Core.dll" + FileToCopy "$bootstrapBinDirectory\Microsoft.NET.StringTools.dll" FileToCopy "$bootstrapBinDirectory\en\Microsoft.Build.resources.dll" "en" FileToCopy "$bootstrapBinDirectory\en\Microsoft.Build.Tasks.Core.resources.dll" "en" diff --git a/src/Build.OM.UnitTests/Microsoft.Build.Engine.OM.UnitTests.csproj b/src/Build.OM.UnitTests/Microsoft.Build.Engine.OM.UnitTests.csproj index 5ec78a92068..18ad711bac4 100644 --- a/src/Build.OM.UnitTests/Microsoft.Build.Engine.OM.UnitTests.csproj +++ b/src/Build.OM.UnitTests/Microsoft.Build.Engine.OM.UnitTests.csproj @@ -99,10 +99,6 @@ True - - - - true diff --git a/src/Build.UnitTests/OpportunisticIntern_Tests.cs b/src/Build.UnitTests/OpportunisticIntern_Tests.cs deleted file mode 100644 index c47f751c349..00000000000 --- a/src/Build.UnitTests/OpportunisticIntern_Tests.cs +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - -using System; -using System.Text; -using Microsoft.Build.Shared; -using Xunit; -using Xunit.Abstractions; - -namespace Microsoft.Build.UnitTests -{ - public abstract class OpportunisticInternTestBase : IDisposable - { - protected TestEnvironment _env; - - public void Dispose() - { - _env.Dispose(); - } - - protected OpportunisticInternTestBase(ITestOutputHelper testOutput) - { - _env = TestEnvironment.Create(testOutput); - } - - private static bool IsInternable(IInternable internable) - { - string i1 = OpportunisticIntern.InternableToString(internable); - string i2 = OpportunisticIntern.InternableToString(internable); - Assert.Equal(i1, i2); // No matter what, the same string value should return. - return Object.ReferenceEquals(i1, i2); - } - - private static void AssertInternable(IInternable internable) - { - Assert.True(IsInternable(internable)); - } - - private static void AssertInternable(StringBuilder sb) - { - AssertInternable(new StringBuilderInternTarget(sb)); - } - - private static string AssertInternable(char[] ch, int startIndex, int count) - { - var target = new CharArrayInternTarget(ch, startIndex, count); - AssertInternable(target); - Assert.Equal(target.Length, count); - - return target.ExpensiveConvertToString(); - } - - private static void AssertInternable(string value) - { - AssertInternable(new StringBuilder(value)); - AssertInternable(value.ToCharArray(), 0, value.ToCharArray().Length); - } - - private static void AssertNotInternable(IInternable internable) - { - Assert.False(IsInternable(internable)); - } - - private static void AssertNotInternable(StringBuilder sb) - { - AssertNotInternable(new StringBuilderInternTarget(sb)); - } - - private static void AssertNotInternable(char[] ch) - { - AssertNotInternable(new CharArrayInternTarget(ch, ch.Length)); - } - - protected static void AssertNotInternable(string value) - { - AssertNotInternable(new StringBuilder(value)); - AssertNotInternable(value.ToCharArray()); - } - - /// - /// Test interning segment of char array - /// - [Fact] - public void SubArray() - { - var result = AssertInternable(new char[] { 'a', 't', 'r', 'u', 'e' }, 1, 4); - - Assert.Equal("true", result); - } - - /// - /// Test interning segment of char array - /// - [Fact] - public void SubArray2() - { - var result = AssertInternable(new char[] { 'a', 't', 'r', 'u', 'e', 'x' }, 1, 4); - - Assert.Equal("true", result); - } - - /// - /// Unique strings should not be interned - /// - [Fact] - public void NonInternableDummyGlobalVariable() - { - AssertNotInternable($"{MSBuildConstants.MSBuildDummyGlobalPropertyHeader}{new string('1', 100)}"); - } - - /// - /// This is the list of hard-coded interns. They should report interned even though they are too small for normal interning. - /// - [Fact] - public void KnownInternableTinyStrings() - { - AssertInternable("C#"); - AssertInternable("F#"); - AssertInternable("VB"); - AssertInternable("True"); - AssertInternable("TRUE"); - AssertInternable("Copy"); - AssertInternable("v4.0"); - AssertInternable("true"); - AssertInternable("FALSE"); - AssertInternable("false"); - AssertInternable("Debug"); - AssertInternable("Build"); - AssertInternable("''!=''"); - AssertInternable("AnyCPU"); - AssertInternable("Library"); - AssertInternable("MSBuild"); - AssertInternable("Release"); - AssertInternable("ResolveAssemblyReference"); - } - - /// - /// Test a set of strings that are similar to each other - /// - [Fact] - public void InternableDifferingOnlyByNthCharacter() - { - string test = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890!@#$%^&*()_+ABCDEFGHIJKLMNOPQRSTUVabcdefghijklmnopqrstuvwxyz0150"; - for (int i = 0; i < test.Length; ++i) - { - string mutated = test.Substring(0, i) + " " + test.Substring(i + 1); - AssertInternable(mutated); - } - } - - /// - /// Test The empty string - /// - [Fact] - public void StringDotEmpty() - { - AssertInternable(String.Empty); - } - - /// - /// Test an empty string. - /// - [Fact] - public void DoubleDoubleQuotes() - { - AssertInternable(""); - } - } - - /// - /// Tests the new (default) implementation of OpportunisticIntern. - /// - public class OpportunisticIntern_Tests : OpportunisticInternTestBase - { - public OpportunisticIntern_Tests(ITestOutputHelper testOutput) - : base(testOutput) - { - OpportunisticIntern.ResetForTests(); - } - } - - /// - /// Tests the legacy implementation of OpportunisticIntern. - /// - public class OpportunisticInternLegacy_Tests : OpportunisticInternTestBase - { - public OpportunisticInternLegacy_Tests(ITestOutputHelper testOutput) - : base(testOutput) - { - _env.SetEnvironmentVariable("MSBuildUseLegacyStringInterner", "1"); - OpportunisticIntern.ResetForTests(); - } - - /// - /// The legacy implementation does not intern tiny strings unless they are on the hard-coded list. - /// - [Fact] - public void NonInternableTinyString() - { - AssertNotInternable("1234"); - } - } - - /// - /// Tests the legacy implementation of OpportunisticIntern with simple concurrency enabled. - /// - public class OpportunisticInternLegacySimpleConcurrecy_Tests : OpportunisticInternTestBase - { - public OpportunisticInternLegacySimpleConcurrecy_Tests(ITestOutputHelper testOutput) - : base(testOutput) - { - _env.SetEnvironmentVariable("MSBuildUseLegacyStringInterner", "1"); - _env.SetEnvironmentVariable("MSBuildUseSimpleInternConcurrency", "1"); - OpportunisticIntern.ResetForTests(); - } - } -} diff --git a/src/Build/BackEnd/BuildManager/BuildManager.cs b/src/Build/BackEnd/BuildManager/BuildManager.cs index 29c2e7c16d6..6327090ecca 100644 --- a/src/Build/BackEnd/BuildManager/BuildManager.cs +++ b/src/Build/BackEnd/BuildManager/BuildManager.cs @@ -34,6 +34,8 @@ using ForwardingLoggerRecord = Microsoft.Build.Logging.ForwardingLoggerRecord; using LoggerDescription = Microsoft.Build.Logging.LoggerDescription; +using Microsoft.NET.StringTools; + namespace Microsoft.Build.Execution { /// @@ -415,7 +417,7 @@ public void BeginBuild(BuildParameters parameters) if (BuildParameters.DumpOpportunisticInternStats) { - OpportunisticIntern.Instance.EnableStatisticsGathering(); + Strings.EnableDiagnostics(); } _overallBuildSuccess = true; @@ -860,7 +862,7 @@ public void EndBuild() if (BuildParameters.DumpOpportunisticInternStats) { - OpportunisticIntern.Instance.ReportStatistics(); + Console.WriteLine(Strings.CreateDiagnosticReport()); } } } diff --git a/src/Build/BackEnd/BuildManager/BuildParameters.cs b/src/Build/BackEnd/BuildManager/BuildParameters.cs index 1129274ba2f..1259648e255 100644 --- a/src/Build/BackEnd/BuildManager/BuildParameters.cs +++ b/src/Build/BackEnd/BuildManager/BuildParameters.cs @@ -70,7 +70,7 @@ public class BuildParameters : ITranslatable /// /// Indicates if we should dump string interning stats. /// - private static bool? s_dumpOpportunisticInternStats; + private static bool? s_dumpStringInterningStats; /// /// Indicates if we should debug the expander. @@ -647,7 +647,7 @@ internal static bool WarnOnUninitializedProperty /// Indicates whether we should dump string interning stats /// internal static bool DumpOpportunisticInternStats => GetStaticBoolVariableOrDefault( - "MSBUILDDUMPOPPORTUNISTICINTERNSTATS", ref s_dumpOpportunisticInternStats, false); + "MSBUILDDUMPOPPORTUNISTICINTERNSTATS", ref s_dumpStringInterningStats, false); /// /// Indicates whether we should dump debugging information about the expander diff --git a/src/Build/Construction/Solution/SolutionProjectGenerator.cs b/src/Build/Construction/Solution/SolutionProjectGenerator.cs index e935b5cc18d..d26fca3b0d3 100644 --- a/src/Build/Construction/Solution/SolutionProjectGenerator.cs +++ b/src/Build/Construction/Solution/SolutionProjectGenerator.cs @@ -28,6 +28,8 @@ using Microsoft.Build.Execution; using Microsoft.Build.Utilities; +using Microsoft.NET.StringTools; + namespace Microsoft.Build.Construction { /// @@ -1097,7 +1099,7 @@ private static string GetConfigurationAndPlatformPropertiesString(ProjectConfigu /// private static string GetPropertiesAttributeForDirectMSBuildTask(ProjectConfigurationInSolution projectConfiguration) { - string directProjectProperties = OpportunisticIntern.InternStringIfPossible(String.Join(";", GetConfigurationAndPlatformPropertiesString(projectConfiguration), SolutionProperties)); + string directProjectProperties = Strings.WeakIntern(String.Join(";", GetConfigurationAndPlatformPropertiesString(projectConfiguration), SolutionProperties)); return directProjectProperties; } @@ -1343,7 +1345,7 @@ private static void AddProjectBuildTask(ProjectInstance traversalProject, Projec /// private void AddMetaprojectBuildTask(ProjectInSolution project, ProjectTargetInstance target, string targetToBuild, string outputItem) { - ProjectTaskInstance task = target.AddTask("MSBuild", OpportunisticIntern.InternStringIfPossible("'%(ProjectReference.Identity)' == '" + GetMetaprojectName(project) + "'"), String.Empty); + ProjectTaskInstance task = target.AddTask("MSBuild", Strings.WeakIntern("'%(ProjectReference.Identity)' == '" + GetMetaprojectName(project) + "'"), String.Empty); task.SetParameter("Projects", "@(ProjectReference)"); if (targetToBuild != null) diff --git a/src/Build/Definition/Project.cs b/src/Build/Definition/Project.cs index 67a17a6066a..285c4cc592a 100644 --- a/src/Build/Definition/Project.cs +++ b/src/Build/Definition/Project.cs @@ -260,11 +260,11 @@ private Project(ProjectRootElement xml, IDictionary globalProper ErrorUtilities.VerifyThrowArgumentLengthIfNotNull(toolsVersion, nameof(toolsVersion)); ErrorUtilities.VerifyThrowArgumentNull(projectCollection, nameof(projectCollection)); ProjectCollection = projectCollection; - var defailtImplementation = new ProjectImpl(this, xml, globalProperties, toolsVersion, subToolsetVersion, loadSettings, evaluationContext); - implementationInternal = (IProjectLinkInternal)defailtImplementation; - implementation = defailtImplementation; + var defaultImplementation = new ProjectImpl(this, xml, globalProperties, toolsVersion, subToolsetVersion, loadSettings, evaluationContext); + implementationInternal = (IProjectLinkInternal)defaultImplementation; + implementation = defaultImplementation; - defailtImplementation.Initialize(globalProperties, toolsVersion, subToolsetVersion, loadSettings, evaluationContext); + defaultImplementation.Initialize(globalProperties, toolsVersion, subToolsetVersion, loadSettings, evaluationContext); } /// diff --git a/src/Build/Evaluation/Expander.cs b/src/Build/Evaluation/Expander.cs index 33d2c467560..93bb74753de 100644 --- a/src/Build/Evaluation/Expander.cs +++ b/src/Build/Evaluation/Expander.cs @@ -24,6 +24,8 @@ using TaskItem = Microsoft.Build.Execution.ProjectItemInstance.TaskItem; using TaskItemFactory = Microsoft.Build.Execution.ProjectItemInstance.TaskItem.TaskItemFactory; +using Microsoft.NET.StringTools; + namespace Microsoft.Build.Evaluation { /// @@ -586,16 +588,12 @@ private static int ScanForClosingQuote(char quoteChar, string expression, int in /// Add the argument in the StringBuilder to the arguments list, handling nulls /// appropriately. /// - private static void AddArgument(List arguments, ReuseableStringBuilder argumentBuilder) + private static void AddArgument(List arguments, SpanBasedStringBuilder argumentBuilder) { - // If we don't have something that can be treated as an argument - // then we should treat it as a null so that passing nulls - // becomes possible through an empty argument between commas. - ErrorUtilities.VerifyThrowArgumentNull(argumentBuilder, nameof(argumentBuilder)); - // we reached the end of an argument, add the builder's final result - // to our arguments. - string argValue = OpportunisticIntern.InternableToString(argumentBuilder).Trim(); + // to our arguments. + argumentBuilder.Trim(); + string argValue = argumentBuilder.ToString(); // We support passing of null through the argument constant value null if (String.Equals("null", argValue, StringComparison.OrdinalIgnoreCase)) @@ -642,69 +640,81 @@ private static string[] ExtractFunctionArguments(IElementLocation elementLocatio List arguments = new List(); - // With the reuseable string builder, there's no particular need to initialize the length as it will already have grown. - using (var argumentBuilder = new ReuseableStringBuilder()) + using SpanBasedStringBuilder argumentBuilder = Strings.GetSpanBasedStringBuilder(); + int? argumentStartIndex = null; + + // We iterate over the string in the for loop below. When we find an argument, instead of adding it to the argument + // builder one-character-at-a-time, we remember the start index and then call this function when we find the end of + // the argument. This appends the entire {start, end} span to the builder in one call. + void FlushCurrentArgumentToArgumentBuilder(int argumentEndIndex) { - unsafe + if (argumentStartIndex.HasValue) { - fixed (char* argumentsContent = argumentsString) - { - // Iterate over the contents of the arguments extracting the - // the individual arguments as we go - for (int n = 0; n < argumentsContentLength; n++) - { - // We found a property expression.. skip over all of it. - if ((n < argumentsContentLength - 1) && (argumentsContent[n] == '$' && argumentsContent[n + 1] == '(')) - { - int nestedPropertyStart = n; - n += 2; // skip over the opening '$(' - - // Scan for the matching closing bracket, skipping any nested ones - n = ScanForClosingParenthesis(argumentsString, n); + argumentBuilder.Append(argumentsString, argumentStartIndex.Value, argumentEndIndex - argumentStartIndex.Value); + argumentStartIndex = null; + } + } - if (n == -1) - { - ProjectErrorUtilities.ThrowInvalidProject(elementLocation, "InvalidFunctionPropertyExpression", expressionFunction, AssemblyResources.GetString("InvalidFunctionPropertyExpressionDetailMismatchedParenthesis")); - } + // Iterate over the contents of the arguments extracting the + // the individual arguments as we go + for (int n = 0; n < argumentsContentLength; n++) + { + // We found a property expression.. skip over all of it. + if ((n < argumentsContentLength - 1) && (argumentsString[n] == '$' && argumentsString[n + 1] == '(')) + { + int nestedPropertyStart = n; + n += 2; // skip over the opening '$(' - argumentBuilder.Append(argumentsString, nestedPropertyStart, (n - nestedPropertyStart) + 1); - } - else if (argumentsContent[n] == '`' || argumentsContent[n] == '"' || argumentsContent[n] == '\'') - { - int quoteStart = n; - n++; // skip over the opening quote + // Scan for the matching closing bracket, skipping any nested ones + n = ScanForClosingParenthesis(argumentsString, n); - n = ScanForClosingQuote(argumentsString[quoteStart], argumentsString, n); + if (n == -1) + { + ProjectErrorUtilities.ThrowInvalidProject(elementLocation, "InvalidFunctionPropertyExpression", expressionFunction, AssemblyResources.GetString("InvalidFunctionPropertyExpressionDetailMismatchedParenthesis")); + } - if (n == -1) - { - ProjectErrorUtilities.ThrowInvalidProject(elementLocation, "InvalidFunctionPropertyExpression", expressionFunction, AssemblyResources.GetString("InvalidFunctionPropertyExpressionDetailMismatchedQuote")); - } + FlushCurrentArgumentToArgumentBuilder(argumentEndIndex: nestedPropertyStart); + argumentBuilder.Append(argumentsString, nestedPropertyStart, (n - nestedPropertyStart) + 1); + } + else if (argumentsString[n] == '`' || argumentsString[n] == '"' || argumentsString[n] == '\'') + { + int quoteStart = n; + n++; // skip over the opening quote - argumentBuilder.Append(argumentsString, quoteStart, (n - quoteStart) + 1); - } - else if (argumentsContent[n] == ',') - { - // We have reached the end of the current argument, go ahead and add it - // to our list - AddArgument(arguments, argumentBuilder); + n = ScanForClosingQuote(argumentsString[quoteStart], argumentsString, n); - // Clear out the argument builder ready for the next argument - argumentBuilder.Remove(0, argumentBuilder.Length); - } - else - { - argumentBuilder.Append(argumentsContent[n]); - } - } + if (n == -1) + { + ProjectErrorUtilities.ThrowInvalidProject(elementLocation, "InvalidFunctionPropertyExpression", expressionFunction, AssemblyResources.GetString("InvalidFunctionPropertyExpressionDetailMismatchedQuote")); } + + FlushCurrentArgumentToArgumentBuilder(argumentEndIndex: quoteStart); + argumentBuilder.Append(argumentsString, quoteStart, (n - quoteStart) + 1); } + else if (argumentsString[n] == ',') + { + FlushCurrentArgumentToArgumentBuilder(argumentEndIndex: n); + + // We have reached the end of the current argument, go ahead and add it + // to our list + AddArgument(arguments, argumentBuilder); - // This will either be the one and only argument, or the last one - // so add it to our list - AddArgument(arguments, argumentBuilder); + // Clear out the argument builder ready for the next argument + argumentBuilder.Clear(); + } + else + { + argumentStartIndex ??= n; + } } + // We reached the end of the string but we may have seen the start but not the end of the last (or only) argument so flush it now. + FlushCurrentArgumentToArgumentBuilder(argumentEndIndex: argumentsContentLength); + + // This will either be the one and only argument, or the last one + // so add it to our list + AddArgument(arguments, argumentBuilder); + return arguments.ToArray(); } @@ -766,55 +776,53 @@ internal static string ExpandMetadataLeaveEscaped(string expression, IMetadataTa } // otherwise, run the more complex Regex to find item metadata references not contained in transforms - // With the reuseable string builder, there's no particular need to initialize the length as it will already have grown. - using (var finalResultBuilder = new ReuseableStringBuilder()) - { - int start = 0; - MetadataMatchEvaluator matchEvaluator = new MetadataMatchEvaluator(metadata, options); + using SpanBasedStringBuilder finalResultBuilder = Strings.GetSpanBasedStringBuilder(); - if (itemVectorExpressions != null) - { - // Move over the expression, skipping those that have been recognized as an item vector expression - // Anything other than an item vector expression we want to expand bare metadata in. - for (int n = 0; n < itemVectorExpressions.Count; n++) - { - string vectorExpression = itemVectorExpressions[n].Value; - - // Extract the part of the expression that appears before the item vector expression - // e.g. the ABC in ABC@(foo->'%(FullPath)') - string subExpressionToReplaceIn = expression.Substring(start, itemVectorExpressions[n].Index - start); - string replacementResult = RegularExpressions.NonTransformItemMetadataPattern.Value.Replace(subExpressionToReplaceIn, new MatchEvaluator(matchEvaluator.ExpandSingleMetadata)); + int start = 0; + MetadataMatchEvaluator matchEvaluator = new MetadataMatchEvaluator(metadata, options); - // Append the metadata replacement - finalResultBuilder.Append(replacementResult); + if (itemVectorExpressions != null) + { + // Move over the expression, skipping those that have been recognized as an item vector expression + // Anything other than an item vector expression we want to expand bare metadata in. + for (int n = 0; n < itemVectorExpressions.Count; n++) + { + string vectorExpression = itemVectorExpressions[n].Value; - // Expand any metadata that appears in the item vector expression's separator - if (itemVectorExpressions[n].Separator != null) - { - vectorExpression = RegularExpressions.NonTransformItemMetadataPattern.Value.Replace(itemVectorExpressions[n].Value, new MatchEvaluator(matchEvaluator.ExpandSingleMetadata), -1, itemVectorExpressions[n].SeparatorStart); - } + // Extract the part of the expression that appears before the item vector expression + // e.g. the ABC in ABC@(foo->'%(FullPath)') + string subExpressionToReplaceIn = expression.Substring(start, itemVectorExpressions[n].Index - start); + string replacementResult = RegularExpressions.NonTransformItemMetadataPattern.Value.Replace(subExpressionToReplaceIn, new MatchEvaluator(matchEvaluator.ExpandSingleMetadata)); - // Append the item vector expression as is - // e.g. the @(foo->'%(FullPath)') in ABC@(foo->'%(FullPath)') - finalResultBuilder.Append(vectorExpression); + // Append the metadata replacement + finalResultBuilder.Append(replacementResult); - // Move onto the next part of the expression that isn't an item vector expression - start = (itemVectorExpressions[n].Index + itemVectorExpressions[n].Length); + // Expand any metadata that appears in the item vector expression's separator + if (itemVectorExpressions[n].Separator != null) + { + vectorExpression = RegularExpressions.NonTransformItemMetadataPattern.Value.Replace(itemVectorExpressions[n].Value, new MatchEvaluator(matchEvaluator.ExpandSingleMetadata), -1, itemVectorExpressions[n].SeparatorStart); } - } - // If there's anything left after the last item vector expression - // then we need to metadata replace and then append that - if (start < expression.Length) - { - string subExpressionToReplaceIn = expression.Substring(start); - string replacementResult = RegularExpressions.NonTransformItemMetadataPattern.Value.Replace(subExpressionToReplaceIn, new MatchEvaluator(matchEvaluator.ExpandSingleMetadata)); + // Append the item vector expression as is + // e.g. the @(foo->'%(FullPath)') in ABC@(foo->'%(FullPath)') + finalResultBuilder.Append(vectorExpression); - finalResultBuilder.Append(replacementResult); + // Move onto the next part of the expression that isn't an item vector expression + start = (itemVectorExpressions[n].Index + itemVectorExpressions[n].Length); } + } + + // If there's anything left after the last item vector expression + // then we need to metadata replace and then append that + if (start < expression.Length) + { + string subExpressionToReplaceIn = expression.Substring(start); + string replacementResult = RegularExpressions.NonTransformItemMetadataPattern.Value.Replace(subExpressionToReplaceIn, new MatchEvaluator(matchEvaluator.ExpandSingleMetadata)); - result = OpportunisticIntern.InternableToString(finalResultBuilder); + finalResultBuilder.Append(replacementResult); } + + result = finalResultBuilder.ToString(); } // Don't create more strings @@ -1144,34 +1152,32 @@ private static class PropertyExpander // Initialize our output string to empty string. // This method is called very often - of the order of 3,000 times per project. - // With the reuseable string builder, there's no particular need to initialize the length as it will already have grown. - using (var result = new ReuseableStringBuilder()) - { - // Append our collected results - if (results != null) - { - // Create a combined result string from the result components that we've gathered - foreach (object component in results) - { - result.Append(FileUtilities.MaybeAdjustFilePath(component.ToString())); - } - } + using SpanBasedStringBuilder result = Strings.GetSpanBasedStringBuilder(); - // Append the last result we collected (it wasn't added to the list) - if (lastResult != null) + // Append our collected results + if (results != null) + { + // Create a combined result string from the result components that we've gathered + foreach (object component in results) { - result.Append(FileUtilities.MaybeAdjustFilePath(lastResult.ToString())); + result.Append(FileUtilities.MaybeAdjustFilePath(component.ToString())); } + } - // And if we couldn't find anymore property tags in the expression, - // so just literally copy the remainder into the result. - if (expression.Length - sourceIndex > 0) - { - result.Append(expression, sourceIndex, expression.Length - sourceIndex); - } + // Append the last result we collected (it wasn't added to the list) + if (lastResult != null) + { + result.Append(FileUtilities.MaybeAdjustFilePath(lastResult.ToString())); + } - return OpportunisticIntern.InternableToString(result); + // And if we couldn't find anymore property tags in the expression, + // so just literally copy the remainder into the result. + if (expression.Length - sourceIndex > 0) + { + result.Append(expression, sourceIndex, expression.Length - sourceIndex); } + + return result.ToString(); } } @@ -1310,51 +1316,53 @@ internal static string ConvertToString(object valueToConvert) { convertedString = (string)valueToConvert; } - else if (valueToConvert is IDictionary) + else if (valueToConvert is IDictionary dictionary) { // If the return type is an IDictionary, then we convert this to // a semi-colon delimited set of A=B pairs. // Key and Value are converted to string and escaped - IDictionary dictionary = valueToConvert as IDictionary; - using (var builder = new ReuseableStringBuilder()) + if (dictionary.Count > 0) { + using SpanBasedStringBuilder builder = Strings.GetSpanBasedStringBuilder(); + foreach (DictionaryEntry entry in dictionary) { if (builder.Length > 0) { - builder.Append(';'); + builder.Append(";"); } // convert and escape each key and value in the dictionary entry builder.Append(EscapingUtilities.Escape(ConvertToString(entry.Key))); - builder.Append('='); + builder.Append("="); builder.Append(EscapingUtilities.Escape(ConvertToString(entry.Value))); } - convertedString = OpportunisticIntern.InternableToString(builder); + convertedString = builder.ToString(); + } + else + { + convertedString = string.Empty; } } - else if (valueToConvert is IEnumerable) + else if (valueToConvert is IEnumerable enumerable) { // If the return is enumerable, then we'll convert to semi-colon delimited elements // each of which must be converted, so we'll recurse for each element - using (var builder = new ReuseableStringBuilder()) - { - IEnumerable enumerable = (IEnumerable)valueToConvert; + using SpanBasedStringBuilder builder = Strings.GetSpanBasedStringBuilder(); - foreach (object element in enumerable) + foreach (object element in enumerable) + { + if (builder.Length > 0) { - if (builder.Length > 0) - { - builder.Append(';'); - } - - // we need to convert and escape each element of the array - builder.Append(EscapingUtilities.Escape(ConvertToString(element))); + builder.Append(";"); } - convertedString = OpportunisticIntern.InternableToString(builder); + // we need to convert and escape each element of the array + builder.Append(EscapingUtilities.Escape(ConvertToString(element))); } + + convertedString = builder.ToString(); } else { @@ -1759,18 +1767,16 @@ private static class ItemExpander // a scalar and then create a single item. Basically we need this // to be able to convert item lists with user specified separators into properties. string expandedItemVector; - using (var builder = new ReuseableStringBuilder()) - { - brokeEarlyNonEmpty = ExpandExpressionCaptureIntoStringBuilder(expander, expressionCapture, items, elementLocation, builder, options); - - if (brokeEarlyNonEmpty) - { - return null; - } + using SpanBasedStringBuilder builder = Strings.GetSpanBasedStringBuilder(); + brokeEarlyNonEmpty = ExpandExpressionCaptureIntoStringBuilder(expander, expressionCapture, items, elementLocation, builder, options); - expandedItemVector = OpportunisticIntern.InternableToString(builder); + if (brokeEarlyNonEmpty) + { + return null; } + expandedItemVector = builder.ToString(); + result = new List(1); if (expandedItemVector.Length > 0) @@ -1941,38 +1947,36 @@ internal static string ExpandItemVectorsIntoString(Expander expander, s return expression; } - using (var builder = new ReuseableStringBuilder()) + using SpanBasedStringBuilder builder = Strings.GetSpanBasedStringBuilder(); + // As we walk through the matches, we need to copy out the original parts of the string which + // are not covered by the match. This preserves original behavior which did not trim whitespace + // from between separators. + int lastStringIndex = 0; + for (int i = 0; i < matches.Count; i++) { - // As we walk through the matches, we need to copy out the original parts of the string which - // are not covered by the match. This preserves original behavior which did not trim whitespace - // from between separators. - int lastStringIndex = 0; - for (int i = 0; i < matches.Count; i++) + if (matches[i].Index > lastStringIndex) { - if (matches[i].Index > lastStringIndex) - { - if ((options & ExpanderOptions.BreakOnNotEmpty) != 0) - { - return null; - } - - builder.Append(expression, lastStringIndex, matches[i].Index - lastStringIndex); - } - - bool brokeEarlyNonEmpty = ExpandExpressionCaptureIntoStringBuilder(expander, matches[i], items, elementLocation, builder, options); - - if (brokeEarlyNonEmpty) + if ((options & ExpanderOptions.BreakOnNotEmpty) != 0) { return null; } - lastStringIndex = matches[i].Index + matches[i].Length; + builder.Append(expression, lastStringIndex, matches[i].Index - lastStringIndex); } - builder.Append(expression, lastStringIndex, expression.Length - lastStringIndex); + bool brokeEarlyNonEmpty = ExpandExpressionCaptureIntoStringBuilder(expander, matches[i], items, elementLocation, builder, options); - return OpportunisticIntern.InternableToString(builder); + if (brokeEarlyNonEmpty) + { + return null; + } + + lastStringIndex = matches[i].Index + matches[i].Length; } + + builder.Append(expression, lastStringIndex, expression.Length - lastStringIndex); + + return builder.ToString(); } /// @@ -2019,7 +2023,7 @@ private static Stack> PrepareTransformStackFromMatch(IEl } /// - /// Expand the match provided into a string, and append that to the provided string builder. + /// Expand the match provided into a string, and append that to the provided InternableString. /// Returns true if ExpanderOptions.BreakOnNotEmpty was passed, expression was going to be non-empty, and so it broke out early. /// /// Type of source items. @@ -2028,7 +2032,7 @@ private static Stack> PrepareTransformStackFromMatch(IEl ExpressionShredder.ItemExpressionCapture capture, IItemProvider evaluatedItems, IElementLocation elementLocation, - ReuseableStringBuilder builder, + SpanBasedStringBuilder builder, ExpanderOptions options ) where S : class, IItem @@ -2069,12 +2073,11 @@ ExpanderOptions options } } builder.Append(item.Key); - builder.Append(';'); + if (i < itemsFromCapture.Count - 1) + { + builder.Append(";"); + } } - - // Remove trailing separator if we added one - if (itemsFromCapture.Count > 0) - builder.Length--; return false; } diff --git a/src/Build/Evaluation/SemiColonTokenizer.cs b/src/Build/Evaluation/SemiColonTokenizer.cs index 3d3c0791baa..625f2ae0487 100644 --- a/src/Build/Evaluation/SemiColonTokenizer.cs +++ b/src/Build/Evaluation/SemiColonTokenizer.cs @@ -1,9 +1,12 @@ // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. +using System; using System.Collections; using System.Collections.Generic; +using Microsoft.NET.StringTools; + namespace Microsoft.Build.Evaluation { /// @@ -145,8 +148,7 @@ private string GetExpressionSubstring(int startIndex, int length) } if (startIndex < endIndex) { - var target = new SubstringInternTarget(_expression, startIndex, endIndex - startIndex); - return OpportunisticIntern.InternableToString(target); + return Strings.WeakIntern(_expression.AsSpan(startIndex, endIndex - startIndex)); } return null; } diff --git a/src/Build/Globbing/MSBuildGlob.cs b/src/Build/Globbing/MSBuildGlob.cs index 6eb35e81b52..4b03541b3df 100644 --- a/src/Build/Globbing/MSBuildGlob.cs +++ b/src/Build/Globbing/MSBuildGlob.cs @@ -7,6 +7,7 @@ using System.Text.RegularExpressions; using Microsoft.Build.Collections; using Microsoft.Build.Shared; +using Microsoft.NET.StringTools; namespace Microsoft.Build.Globbing { @@ -176,7 +177,7 @@ public static MSBuildGlob Parse(string globRoot, string fileSpec) globRoot = Directory.GetCurrentDirectory(); } - globRoot = OpportunisticIntern.InternStringIfPossible(FileUtilities.NormalizePath(globRoot).WithTrailingSlash()); + globRoot = Strings.WeakIntern(FileUtilities.NormalizePath(globRoot).WithTrailingSlash()); var lazyState = new Lazy(() => { diff --git a/src/Build/Instance/TaskRegistry.cs b/src/Build/Instance/TaskRegistry.cs index ef40362e4eb..a44626cccae 100644 --- a/src/Build/Instance/TaskRegistry.cs +++ b/src/Build/Instance/TaskRegistry.cs @@ -21,6 +21,8 @@ using System.Collections.ObjectModel; using Microsoft.Build.Shared.FileSystem; +using Microsoft.NET.StringTools; + namespace Microsoft.Build.Execution { /// @@ -308,7 +310,7 @@ IFileSystem fileSystem if (assemblyFile != null && !Path.IsPathRooted(assemblyFile)) { - assemblyFile = OpportunisticIntern.InternStringIfPossible(Path.Combine(directoryOfImportingFile, assemblyFile)); + assemblyFile = Strings.WeakIntern(Path.Combine(directoryOfImportingFile, assemblyFile)); } if (String.Equals(taskFactory, RegisteredTaskRecord.CodeTaskFactory, StringComparison.OrdinalIgnoreCase) || String.Equals(taskFactory, RegisteredTaskRecord.XamlTaskFactory, StringComparison.OrdinalIgnoreCase)) diff --git a/src/Build/Microsoft.Build.csproj b/src/Build/Microsoft.Build.csproj index 35b6e45e851..5ef2f9f7cd8 100644 --- a/src/Build/Microsoft.Build.csproj +++ b/src/Build/Microsoft.Build.csproj @@ -30,6 +30,7 @@ + @@ -357,10 +358,6 @@ - - - - Collections\CopyOnWriteDictionary.cs diff --git a/src/Directory.Build.targets b/src/Directory.Build.targets index 7d4b6537997..d96c264411f 100644 --- a/src/Directory.Build.targets +++ b/src/Directory.Build.targets @@ -57,7 +57,7 @@ - + diff --git a/src/Framework.UnitTests/Microsoft.Build.Framework.UnitTests.csproj b/src/Framework.UnitTests/Microsoft.Build.Framework.UnitTests.csproj index 0d39dc25f6e..b73c360d59d 100644 --- a/src/Framework.UnitTests/Microsoft.Build.Framework.UnitTests.csproj +++ b/src/Framework.UnitTests/Microsoft.Build.Framework.UnitTests.csproj @@ -18,6 +18,7 @@ + @@ -43,10 +44,6 @@ - - - - diff --git a/src/MSBuild/MSBuild.csproj b/src/MSBuild/MSBuild.csproj index a6f0873dac4..112e81f6f5e 100644 --- a/src/MSBuild/MSBuild.csproj +++ b/src/MSBuild/MSBuild.csproj @@ -130,10 +130,6 @@ - - - - diff --git a/src/MSBuildTaskHost/MSBuildTaskHost.csproj b/src/MSBuildTaskHost/MSBuildTaskHost.csproj index 615488880be..0437fdcbb36 100644 --- a/src/MSBuildTaskHost/MSBuildTaskHost.csproj +++ b/src/MSBuildTaskHost/MSBuildTaskHost.csproj @@ -129,15 +129,6 @@ NodeShutdown.cs - - IInternable.cs - - - WeakStringCache.cs - - - OpportunisticIntern.cs - ReadOnlyEmptyCollection.cs @@ -169,7 +160,6 @@ - LogMessagePacket.cs @@ -210,6 +200,9 @@ + + + diff --git a/src/Package/MSBuild.Engine.Corext/MsBuild.Engine.Corext.nuspec b/src/Package/MSBuild.Engine.Corext/MsBuild.Engine.Corext.nuspec index 9482f3faf5d..2918e172a5c 100644 --- a/src/Package/MSBuild.Engine.Corext/MsBuild.Engine.Corext.nuspec +++ b/src/Package/MSBuild.Engine.Corext/MsBuild.Engine.Corext.nuspec @@ -43,6 +43,8 @@ + + @@ -97,6 +99,8 @@ + + diff --git a/src/Package/MSBuild.VSSetup/files.swr b/src/Package/MSBuild.VSSetup/files.swr index 2f87ef174f2..79d3960e612 100644 --- a/src/Package/MSBuild.VSSetup/files.swr +++ b/src/Package/MSBuild.VSSetup/files.swr @@ -48,6 +48,8 @@ folder InstallDir:\MSBuild\Current\Bin file source=$(X86BinPath)System.Resources.Extensions.dll vs.file.ngenArchitecture=all file source=$(X86BinPath)System.Runtime.CompilerServices.Unsafe.dll vs.file.ngenArchitecture=all file source=$(X86BinPath)System.Threading.Tasks.Dataflow.dll vs.file.ngenArchitecture=all vs.file.ngenPriority=1 + file source=$(X86BinPath)Microsoft.NET.StringTools.dll vs.file.ngenArchitecture=all + file source=$(TaskHostBinPath)Microsoft.NET.StringTools.net35.dll vs.file.ngenArchitecture=all file source=$(X86BinPath)System.Collections.Immutable.dll vs.file.ngenApplications="[installDir]\MSBuild\Current\Bin\MSBuild.exe" vs.file.ngenArchitecture=all vs.file.ngenPriority=1 file source=$(X86BinPath)Microsoft.Common.CurrentVersion.targets file source=$(X86BinPath)Microsoft.Common.CrossTargeting.targets @@ -198,6 +200,8 @@ folder InstallDir:\MSBuild\Current\Bin\amd64 file source=$(X86BinPath)System.Runtime.CompilerServices.Unsafe.dll vs.file.ngenArchitecture=all file source=$(X86BinPath)System.Threading.Tasks.Dataflow.dll vs.file.ngenArchitecture=all file source=$(X86BinPath)System.Collections.Immutable.dll vs.file.ngenArchitecture=all + file source=$(X86BinPath)Microsoft.NET.StringTools.dll vs.file.ngenArchitecture=all + file source=$(TaskHostBinPath)Microsoft.NET.StringTools.net35.dll vs.file.ngenArchitecture=all file source=$(X86BinPath)Microsoft.Common.CurrentVersion.targets file source=$(X86BinPath)Microsoft.Common.CrossTargeting.targets file source=$(X86BinPath)Microsoft.Common.overridetasks @@ -321,6 +325,7 @@ folder InstallDir:\Common7\IDE\CommonExtensions\MSBuild file source=$(SourceDir)Build\Microsoft.Build.pkgdef file source=$(SourceDir)Build\System.Text.Encodings.Web.pkgdef file source=$(SourceDir)Build\System.Text.Json.pkgdef + file source=$(SourceDir)StringTools\StringTools.pkgdef file source=$(SourceDir)Tasks\Microsoft.Build.Tasks.Core.pkgdef file source=$(SourceDir)Tasks\System.Resources.Extensions.pkgdef file source=$(SourceDir)Utilities\Microsoft.Build.Utilities.Core.pkgdef diff --git a/src/Shared/Constants.cs b/src/Shared/Constants.cs index 42f82771737..d8b2c66c98d 100644 --- a/src/Shared/Constants.cs +++ b/src/Shared/Constants.cs @@ -60,8 +60,6 @@ internal static class MSBuildConstants /// internal const string CurrentToolsVersion = "Current"; - // if you change the key also change the following clones - // Microsoft.Build.OpportunisticIntern.BucketedPrioritizedStringList.TryIntern internal const string MSBuildDummyGlobalPropertyHeader = "MSBuildProjectInstance"; /// diff --git a/src/Shared/EscapingUtilities.cs b/src/Shared/EscapingUtilities.cs index a8d86f06c31..2f0082459b3 100644 --- a/src/Shared/EscapingUtilities.cs +++ b/src/Shared/EscapingUtilities.cs @@ -6,6 +6,8 @@ using System.Globalization; using System.Text; +using Microsoft.NET.StringTools; + namespace Microsoft.Build.Shared { /// @@ -181,7 +183,7 @@ private static string EscapeWithOptionalCaching(string unescapedString, bool cac return StringBuilderCache.GetStringAndRelease(escapedStringBuilder); } - string escapedString = OpportunisticIntern.StringBuilderToString(escapedStringBuilder); + string escapedString = Strings.WeakIntern(escapedStringBuilder.ToString()); StringBuilderCache.Release(escapedStringBuilder); lock (s_unescapedToEscapedStrings) diff --git a/src/Shared/IInternable.cs b/src/Shared/IInternable.cs deleted file mode 100644 index 9bfa741e097..00000000000 --- a/src/Shared/IInternable.cs +++ /dev/null @@ -1,341 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - -using System; -using System.Text; -using Microsoft.Build.Shared; - -namespace Microsoft.Build -{ - #region IInternable - /// - /// Define the methods needed to intern something. - /// - internal interface IInternable - { - /// - /// The length of the target. - /// - int Length { get; } - - /// - /// Indexer into the target. Presumed to be fast. - /// - char this[int index] { get; } - - /// - /// Convert target to string. Presumed to be slow (and will be called just once). - /// - string ExpensiveConvertToString(); - - /// - /// Compare target to string. Assumes string is of equal or smaller length than target. - /// - bool StartsWithStringByOrdinalComparison(string other); - - /// - /// Reference compare target to string. If target is non-string this should return false. - /// - bool ReferenceEquals(string other); - } - #endregion - - - #region IInternable Implementations - /// - /// A wrapper over StringBuilder. - /// - internal readonly struct StringBuilderInternTarget : IInternable - { - /// - /// The held StringBuilder - /// - private readonly StringBuilder _target; - - /// - /// Pointless comment about constructor. - /// - internal StringBuilderInternTarget(StringBuilder target) - { - _target = target; - } - - /// - /// The length of the target. - /// - public int Length => _target.Length; - - /// - /// Indexer into the target. Presumed to be fast. - /// - public char this[int index] => _target[index]; - - /// - /// Never reference equals to string. - /// - public bool ReferenceEquals(string other) => false; - - /// - /// Convert target to string. Presumed to be slow (and will be called just once). - /// - public string ExpensiveConvertToString() - { - // PERF NOTE: This will be an allocation hot-spot because the StringBuilder is finally determined to - // not be internable. There is still only one conversion of StringBuilder into string it has just - // moved into this single spot. - return _target.ToString(); - } - - /// - /// Compare target to string. Assumes string is of equal or smaller length than target. - /// - public bool StartsWithStringByOrdinalComparison(string other) - { -#if DEBUG - ErrorUtilities.VerifyThrow(other.Length <= _target.Length, "should be at most as long as target"); -#endif - int length = other.Length; - - // Backwards because the end of the string is more likely to be different earlier in the loop. - // For example, C:\project1, C:\project2 - for (int i = length - 1; i >= 0; --i) - { - if (_target[i] != other[i]) - { - return false; - } - } - - return true; - } - - /// - /// Don't use this function. Use ExpensiveConvertToString - /// - public override string ToString() => throw new InvalidOperationException(); - } - - /// - /// A wrapper over char[]. - /// - internal readonly struct CharArrayInternTarget : IInternable - { - /// - /// Start index for the string - /// - private readonly int _startIndex; - - /// - /// The held array - /// - private readonly char[] _target; - - /// - /// Pointless comment about constructor. - /// - internal CharArrayInternTarget(char[] target, int count) - : this(target, 0, count) - { - } - - /// - /// Pointless comment about constructor. - /// - internal CharArrayInternTarget(char[] target, int startIndex, int count) - { -#if DEBUG - if (startIndex + count > target.Length) - { - ErrorUtilities.ThrowInternalError("wrong length"); - } -#endif - _target = target; - _startIndex = startIndex; - Length = count; - } - - /// - /// The length of the target. - /// - public int Length { get; } - - /// - /// Indexer into the target. Presumed to be fast. - /// - public char this[int index] - { - get - { - return _target[index + _startIndex]; - } - } - - /// - /// Convert target to string. Presumed to be slow (and will be called just once). - /// - public bool ReferenceEquals(string other) - { - return false; - } - - /// - /// Convert target to string. Presumed to be slow (and will be called just once). - /// - public string ExpensiveConvertToString() - { - // PERF NOTE: This will be an allocation hot-spot because the char[] is finally determined to - // not be internable. There is still only one conversion of char[] into string it has just - // moved into this single spot. - return new string(_target, _startIndex, Length); - } - - /// - /// Compare target to string. Assumes string is of equal or smaller length than target. - /// - public bool StartsWithStringByOrdinalComparison(string other) - { -#if DEBUG - ErrorUtilities.VerifyThrow(other.Length <= Length, "should be at most as long as target"); -#endif - // Backwards because the end of the string is (by observation of Australian Government build) more likely to be different earlier in the loop. - // For example, C:\project1, C:\project2 - for (int i = other.Length - 1; i >= 0; --i) - { - if (_target[i + _startIndex] != other[i]) - { - return false; - } - } - - return true; - } - - /// - /// Don't use this function. Use ExpensiveConvertToString - /// - public override string ToString() - { - throw new InvalidOperationException(); - } - } - - /// - /// Wrapper over a string. - /// - internal readonly struct StringInternTarget : IInternable - { - /// - /// Stores the wrapped string. - /// - private readonly string _target; - - /// - /// Constructor of the class - /// - /// The string to wrap - internal StringInternTarget(string target) - { - ErrorUtilities.VerifyThrowArgumentLength(target, nameof(target)); - _target = target; - } - - /// - /// Gets the length of the target string. - /// - public int Length => _target.Length; - - /// - /// Gets the n character in the target string. - /// - /// Index of the character to gather. - /// The character in the position marked by index. - public char this[int index] => _target[index]; - - /// - /// Returns the target which is already a string. - /// - /// The target string. - public string ExpensiveConvertToString() => _target; - - /// - /// Compare target to string. Assumes string is of equal or smaller length than target. - /// - /// The string to compare with the target. - /// True if target starts with , false otherwise. - public bool StartsWithStringByOrdinalComparison(string other) => _target.StartsWith(other, StringComparison.Ordinal); - - /// - /// Verifies if the reference of the target string is the same of the given string. - /// - /// The string reference to compare to. - /// True if both references are equal, false otherwise. - public bool ReferenceEquals(string other) => ReferenceEquals(_target, other); - } - - /// - /// Wrapper over a substring of a string. - /// - internal readonly struct SubstringInternTarget : IInternable - { - /// - /// Stores the wrapped string. - /// - private readonly string _target; - - /// - /// Start index of the substring within the wrapped string. - /// - private readonly int _startIndex; - - /// - /// Constructor of the class - /// - /// The string to wrap. - /// Start index of the substring within . - /// Length of the substring. - internal SubstringInternTarget(string target, int startIndex, int length) - { -#if DEBUG - if (startIndex + length > target.Length) - { - ErrorUtilities.ThrowInternalError("wrong length"); - } -#endif - _target = target; - _startIndex = startIndex; - Length = length; - } - - /// - /// Gets the length of the target substring. - /// - public int Length { get; } - - /// - /// Gets the n character in the target substring. - /// - /// Index of the character to gather. - /// The character in the position marked by index. - public char this[int index] => _target[index + _startIndex]; - - /// - /// Returns the target substring as a string. - /// - /// The substring. - public string ExpensiveConvertToString() => _target.Substring(_startIndex, Length); - - /// - /// Compare target substring to a string. Assumes string is of equal or smaller length than the target substring. - /// - /// The string to compare with the target substring. - /// True if target substring starts with , false otherwise. - public bool StartsWithStringByOrdinalComparison(string other) => String.CompareOrdinal(_target, _startIndex, other, 0, other.Length) == 0; - - /// - /// Never reference equals to string. - /// - public bool ReferenceEquals(string other) => false; - } - - #endregion -} diff --git a/src/Shared/InterningBinaryReader.cs b/src/Shared/InterningBinaryReader.cs index 8eba40139be..40589a0becb 100644 --- a/src/Shared/InterningBinaryReader.cs +++ b/src/Shared/InterningBinaryReader.cs @@ -8,6 +8,8 @@ using ErrorUtilities = Microsoft.Build.Shared.ErrorUtilities; +using Microsoft.NET.StringTools; + namespace Microsoft.Build { /// @@ -38,7 +40,7 @@ internal class InterningBinaryReader : BinaryReader /// Comment about constructing. /// private InterningBinaryReader(Stream input, Buffer buffer) - : base(input, buffer.Encoding) + : base(input, Encoding.UTF8) { if (input == null) { @@ -46,7 +48,7 @@ private InterningBinaryReader(Stream input, Buffer buffer) } _buffer = buffer; - _decoder = buffer.Encoding.GetDecoder(); + _decoder = Encoding.UTF8.GetDecoder(); } /// @@ -63,7 +65,7 @@ override public String ReadString() int n = 0; int stringLength; int readLength; - int charsRead; + int charsRead = 0; // Length of the string in bytes, not chars stringLength = Read7BitEncodedInt(); @@ -78,8 +80,7 @@ override public String ReadString() } char[] charBuffer = _buffer.CharBuffer; - - StringBuilder sb = null; + char[] resultBuffer = null; do { readLength = ((stringLength - currPos) > MaxCharsBuffer) ? MaxCharsBuffer : (stringLength - currPos); @@ -104,6 +105,8 @@ override public String ReadString() { ErrorUtilities.ThrowInternalError("From calculating based on the memorystream, about to read n = {0}. length = {1}, rawPosition = {2}, readLength = {3}, stringLength = {4}, currPos = {5}.", n, length, rawPosition, readLength, stringLength, currPos); } + + memoryStream.Seek(n, SeekOrigin.Current); } if (rawBuffer == null) @@ -124,26 +127,20 @@ override public String ReadString() throw new EndOfStreamException(); } - charsRead = _decoder.GetChars(rawBuffer, rawPosition, n, charBuffer, 0); - - memoryStream?.Seek(readLength, SeekOrigin.Current); - if (currPos == 0 && n == stringLength) { - return OpportunisticIntern.CharArrayToString(charBuffer, charsRead); + charsRead = _decoder.GetChars(rawBuffer, rawPosition, n, charBuffer, 0); + return Strings.WeakIntern(charBuffer.AsSpan(0, charsRead)); } - if (sb == null) - { - sb = new StringBuilder(stringLength); // Actual string length in chars may be smaller. - } + resultBuffer ??= new char[stringLength]; // Actual string length in chars may be smaller. + charsRead += _decoder.GetChars(rawBuffer, rawPosition, n, resultBuffer, charsRead); - sb.Append(charBuffer, 0, charsRead); currPos += n; } while (currPos < stringLength); - return OpportunisticIntern.StringBuilderToString(sb); + return Strings.WeakIntern(resultBuffer.AsSpan(0, charsRead)); } catch (Exception e) { @@ -186,9 +183,8 @@ private class Buffer : SharedReadBuffer /// internal Buffer() { - this.Encoding = new UTF8Encoding(); this.CharBuffer = new char[MaxCharsBuffer]; - this.ByteBuffer = new byte[Encoding.GetMaxByteCount(MaxCharsBuffer)]; + this.ByteBuffer = new byte[Encoding.UTF8.GetMaxByteCount(MaxCharsBuffer)]; } /// @@ -208,15 +204,6 @@ internal byte[] ByteBuffer get; private set; } - - /// - /// The encoding. - /// - internal UTF8Encoding Encoding - { - get; - private set; - } } } diff --git a/src/Shared/OpportunisticIntern.cs b/src/Shared/OpportunisticIntern.cs deleted file mode 100644 index 48d5d407c7c..00000000000 --- a/src/Shared/OpportunisticIntern.cs +++ /dev/null @@ -1,1093 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - -using System; -#if !CLR2COMPATIBILITY -using System.Collections.Concurrent; -#endif -using System.Text; -using System.Linq; -using System.Collections.Generic; -using System.Diagnostics; -using System.Globalization; -using Microsoft.Build.Shared; -using Microsoft.Build.Utilities; - -namespace Microsoft.Build -{ - /// - /// This class is used to selectively intern strings. It should be used at the point of new string creation. - /// For example, - /// - /// string interned = OpportunisticIntern.Intern(String.Join(",",someStrings)); - /// - /// There are currently two underlying implementations. The new default one in WeakStringCacheInterner is based on weak GC handles. - /// The legacy one in BucketedPrioritizedStringList is available only as an escape hatch by setting an environment variable. - /// - /// The legacy implementation uses heuristics to decide whether it will be efficient to intern a string or not. There is no - /// guarantee that a string will intern. - /// - /// The thresholds and sizes were determined by experimentation to give the best number of bytes saved - /// at reasonable elapsed time cost. - /// - /// The new implementation interns all strings but maintains only weak references so it doesn't keep the strings alive. - /// - internal sealed class OpportunisticIntern - { - /// - /// Defines the interner interface as we currently implement more than one. - /// - private interface IInternerImplementation - { - /// - /// Converts the given internable candidate to its string representation. Efficient implementions have side-effects - /// of caching the results to end up with as few duplicates on the managed heap as practical. - /// - string InterningToString(T candidate) where T : IInternable; - - /// - /// Prints implementation specific interning statistics to the console. - /// - /// A string identifying the interner in the output. - void ReportStatistics(string heading); - } - - /// - /// The singleton instance of OpportunisticIntern. - /// - private static OpportunisticIntern _instance = new OpportunisticIntern(); - internal static OpportunisticIntern Instance => _instance; - - private readonly bool _useLegacyInterner = Traits.Instance.UseLegacyStringInterner; - private readonly bool _useSimpleConcurrency = Traits.Instance.UseSimpleInternConcurrency; - - /// - /// The size of the small mru list. - /// - private readonly int _smallMruSize; - - /// - /// The size of the large mru list. - /// - private readonly int _largeMruSize; - - /// - /// The size of the huge mru list. - /// - private readonly int _hugeMruSize; - - /// - /// The smallest size a string can be to be considered small. - /// - private readonly int _smallMruThreshold; - - /// - /// The smallest size a string can be to be considered large. - /// - private readonly int _largeMruThreshold; - - /// - /// The smallest size a string can be to be considered huge. - /// - private readonly int _hugeMruThreshold; - - /// - /// The smallest size a string can be to be ginormous. - /// 8K for large object heap. - /// - private readonly int _ginormousThreshold; - - /// - /// The interner implementation in use. - /// - private IInternerImplementation _interner; - - #region Statistics - /// - /// What if Mru lists were infinitely long? - /// - private BucketedPrioritizedStringList _whatIfInfinite; - - /// - /// What if we doubled the size of the Mru lists? - /// - private BucketedPrioritizedStringList _whatIfDoubled; - - /// - /// What if we halved the size of the Mru lists? - /// - private BucketedPrioritizedStringList _whatIfHalved; - - /// - /// What if the size of Mru lists was zero? (We still intern tiny strings in this case) - /// - private BucketedPrioritizedStringList _whatIfZero; - #endregion - - private OpportunisticIntern() - { - _smallMruSize = AssignViaEnvironment("MSBUILDSMALLINTERNSIZE", 50); - _largeMruSize = AssignViaEnvironment("MSBUILDLARGEINTERNSIZE", 100); - _hugeMruSize = AssignViaEnvironment("MSBUILDHUGEINTERNSIZE", 100); - _smallMruThreshold = AssignViaEnvironment("MSBUILDSMALLINTERNTHRESHOLD", 50); - _largeMruThreshold = AssignViaEnvironment("MSBUILDLARGEINTERNTHRESHOLD", 70); - _hugeMruThreshold = AssignViaEnvironment("MSBUILDHUGEINTERNTHRESHOLD", 200); - _ginormousThreshold = AssignViaEnvironment("MSBUILDGINORMOUSINTERNTHRESHOLD", 8000); - - _interner = _useLegacyInterner - ? (IInternerImplementation)new BucketedPrioritizedStringList(gatherStatistics: false, _smallMruSize, _largeMruSize, _hugeMruSize, - _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency) - : (IInternerImplementation)new WeakStringCacheInterner(gatherStatistics: false); - } - - /// - /// Recreates the singleton instance based on the current environment (test only). - /// - internal static void ResetForTests() - { - Debug.Assert(BuildEnvironmentHelper.Instance.RunningTests); - _instance = new OpportunisticIntern(); - } - - /// - /// Assign an int from an environment variable. If its not present, use the default. - /// - private int AssignViaEnvironment(string env, int @default) - { - string threshold = Environment.GetEnvironmentVariable(env); - if (!string.IsNullOrEmpty(threshold)) - { - if (int.TryParse(threshold, out int result)) - { - return result; - } - } - - return @default; - } - - /// - /// Turn on statistics gathering. - /// - internal void EnableStatisticsGathering() - { - if (_useLegacyInterner) - { - // Statistics include several 'what if' scenarios such as doubling the size of the MRU lists. - _interner = new BucketedPrioritizedStringList(gatherStatistics: true, _smallMruSize, _largeMruSize, _hugeMruSize, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency); - _whatIfInfinite = new BucketedPrioritizedStringList(gatherStatistics: true, int.MaxValue, int.MaxValue, int.MaxValue, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency); - _whatIfDoubled = new BucketedPrioritizedStringList(gatherStatistics: true, _smallMruSize * 2, _largeMruSize * 2, _hugeMruSize * 2, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency); - _whatIfHalved = new BucketedPrioritizedStringList(gatherStatistics: true, _smallMruSize / 2, _largeMruSize / 2, _hugeMruSize / 2, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency); - _whatIfZero = new BucketedPrioritizedStringList(gatherStatistics: true, 0, 0, 0, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency); - } - else - { - _interner = new WeakStringCacheInterner(gatherStatistics: true); - } - } - - /// - /// Intern the given internable. - /// - internal static string InternableToString(T candidate) where T : IInternable - { - return Instance.InternableToStringImpl(candidate); - } - - /// - /// Potentially Intern the given string builder. - /// - internal static string StringBuilderToString(StringBuilder candidate) - { - return Instance.InternableToStringImpl(new StringBuilderInternTarget(candidate)); - } - - /// - /// Potentially Intern the given char array. - /// - internal static string CharArrayToString(char[] candidate, int count) - { - return Instance.InternableToStringImpl(new CharArrayInternTarget(candidate, count)); - } - - /// - /// Potentially Intern the given char array. - /// - internal static string CharArrayToString(char[] candidate, int startIndex, int count) - { - return Instance.InternableToStringImpl(new CharArrayInternTarget(candidate, startIndex, count)); - } - - /// - /// Potentially Intern the given string. - /// - /// The string to intern. - /// The interned string, or the same string if it could not be interned. - internal static string InternStringIfPossible(string candidate) - { - return Instance.InternableToStringImpl(new StringInternTarget(candidate)); - } - - /// - /// Intern the given internable. - /// - private string InternableToStringImpl(T candidate) where T : IInternable - { - if (candidate.Length == 0) - { - // As in the case that a property or itemlist has evaluated to empty. - return string.Empty; - } - - if (_whatIfInfinite != null) - { - _whatIfInfinite.InterningToString(candidate); - _whatIfDoubled.InterningToString(candidate); - _whatIfHalved.InterningToString(candidate); - _whatIfZero.InterningToString(candidate); - } - - string result = _interner.InterningToString(candidate); -#if DEBUG - string expected = candidate.ExpensiveConvertToString(); - if (!String.Equals(result, expected)) - { - ErrorUtilities.ThrowInternalError("Interned string {0} should have been {1}", result, expected); - } -#endif - return result; - } - - /// - /// Report statistics about interning. Don't call unless GatherStatistics has been called beforehand. - /// - internal void ReportStatistics() - { - _interner.ReportStatistics("Main"); - if (_useLegacyInterner) - { - _whatIfInfinite.ReportStatistics("if Infinite"); - _whatIfDoubled.ReportStatistics("if Doubled"); - _whatIfHalved.ReportStatistics("if Halved"); - _whatIfZero.ReportStatistics("if Zero"); - Console.WriteLine(" * Even for MRU size of zero there will still be some intern hits because of the tiny "); - Console.WriteLine(" string matching (eg. 'true')"); - } - } - - private static bool TryInternHardcodedString(T candidate, string str, ref string interned) where T : IInternable - { - Debug.Assert(candidate.Length == str.Length); - - if (candidate.StartsWithStringByOrdinalComparison(str)) - { - interned = str; - return true; - } - return false; - } - - /// - /// Try to match the candidate with small number of hardcoded interned string literals. - /// The return value indicates how the string was interned (if at all). - /// - /// - /// True if the candidate matched a hardcoded literal, null if it matched a "do not intern" string, false otherwise. - /// - private static bool? TryMatchHardcodedStrings(T candidate, out string interned) where T : IInternable - { - int length = candidate.Length; - interned = null; - - // Each of the hard-coded small strings below showed up in a profile run with considerable duplication in memory. - if (length == 2) - { - if (candidate[1] == '#') - { - if (candidate[0] == 'C') - { - interned = "C#"; - return true; - } - - if (candidate[0] == 'F') - { - interned = "F#"; - return true; - } - } - - if (candidate[0] == 'V' && candidate[1] == 'B') - { - interned = "VB"; - return true; - } - } - else if (length == 4) - { - if (TryInternHardcodedString(candidate, "TRUE", ref interned) || - TryInternHardcodedString(candidate, "True", ref interned) || - TryInternHardcodedString(candidate, "Copy", ref interned) || - TryInternHardcodedString(candidate, "true", ref interned) || - TryInternHardcodedString(candidate, "v4.0", ref interned)) - { - return true; - } - } - else if (length == 5) - { - if (TryInternHardcodedString(candidate, "FALSE", ref interned) || - TryInternHardcodedString(candidate, "false", ref interned) || - TryInternHardcodedString(candidate, "Debug", ref interned) || - TryInternHardcodedString(candidate, "Build", ref interned) || - TryInternHardcodedString(candidate, "Win32", ref interned)) - { - return true; - } - } - else if (length == 6) - { - if (TryInternHardcodedString(candidate, "''!=''", ref interned) || - TryInternHardcodedString(candidate, "AnyCPU", ref interned)) - { - return true; - } - } - else if (length == 7) - { - if (TryInternHardcodedString(candidate, "Library", ref interned) || - TryInternHardcodedString(candidate, "MSBuild", ref interned) || - TryInternHardcodedString(candidate, "Release", ref interned)) - { - return true; - } - } - // see Microsoft.Build.BackEnd.BuildRequestConfiguration.CreateUniqueGlobalProperty - else if (length > MSBuildConstants.MSBuildDummyGlobalPropertyHeader.Length && - candidate.StartsWithStringByOrdinalComparison(MSBuildConstants.MSBuildDummyGlobalPropertyHeader)) - { - // don't want to leak unique strings into the cache - interned = candidate.ExpensiveConvertToString(); - return null; - } - else if (length == 24) - { - if (TryInternHardcodedString(candidate, "ResolveAssemblyReference", ref interned)) - { - return true; - } - } - return false; - } - - /// - /// Implements interning based on a WeakStringCache (new implementation). - /// - private class WeakStringCacheInterner : IInternerImplementation - { - /// - /// Enumerates the possible interning results. - /// - private enum InternResult - { - MatchedHardcodedString, - FoundInWeakStringCache, - AddedToWeakStringCache, - RejectedFromInterning - } - - /// - /// The cache to keep strings in. - /// - private readonly WeakStringCache _weakStringCache = new WeakStringCache(); - -#region Statistics - /// - /// Whether or not to gather statistics. - /// - private readonly bool _gatherStatistics; - - /// - /// Number of times interning with hardcoded string literals worked. - /// - private int _hardcodedInternHits; - - /// - /// Number of times the regular interning path found the string in the cache. - /// - private int _regularInternHits; - - /// - /// Number of times the regular interning path added the string to the cache. - /// - private int _regularInternMisses; - - /// - /// Number of times interning wasn't attempted. - /// - private int _rejectedStrings; - - /// - /// Total number of strings eliminated by interning. - /// - private int _internEliminatedStrings; - - /// - /// Total number of chars eliminated across all strings. - /// - private int _internEliminatedChars; - - /// - /// Maps strings that went though the regular (i.e. not hardcoded) interning path to the number of times they have been - /// seen. The higher the number the better the payoff if the string had been hardcoded. - /// - private Dictionary _missedHardcodedStrings; - -#endregion - - public WeakStringCacheInterner(bool gatherStatistics) - { - if (gatherStatistics) - { - _missedHardcodedStrings = new Dictionary(); - } - _gatherStatistics = gatherStatistics; - } - - /// - /// Intern the given internable. - /// - public string InterningToString(T candidate) where T : IInternable - { - if (_gatherStatistics) - { - return InternWithStatistics(candidate); - } - else - { - TryIntern(candidate, out string result); - return result; - } - } - - /// - /// Report statistics to the console. - /// - public void ReportStatistics(string heading) - { - string title = "Opportunistic Intern (" + heading + ")"; - Console.WriteLine("\n{0}{1}{0}", new string('=', 41 - (title.Length / 2)), title); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Hardcoded Hits", _hardcodedInternHits, "hits"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Hardcoded Rejects", _rejectedStrings, "rejects"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "WeakStringCache Hits", _regularInternHits, "hits"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "WeakStringCache Misses", _regularInternMisses, "misses"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Strings*", _internEliminatedStrings, "strings"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Chars", _internEliminatedChars, "chars"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Estimated Eliminated Bytes", _internEliminatedChars * 2, "bytes"); - Console.WriteLine("Elimination assumes that strings provided were unique objects."); - Console.WriteLine("|---------------------------------------------------------------------------------|"); - - IEnumerable topMissingHardcodedString = - _missedHardcodedStrings - .OrderByDescending(kv => kv.Value * kv.Key.Length) - .Take(15) - .Where(kv => kv.Value > 1) - .Select(kv => string.Format(CultureInfo.InvariantCulture, "({1} instances x each {2} chars)\n{0}", kv.Key, kv.Value, kv.Key.Length)); - - Console.WriteLine("##########Top Missing Hardcoded Strings: \n{0} ", string.Join("\n==============\n", topMissingHardcodedString.ToArray())); - Console.WriteLine(); - - WeakStringCache.DebugInfo debugInfo = _weakStringCache.GetDebugInfo(); - Console.WriteLine("WeakStringCache statistics:"); - Console.WriteLine("String count live/collected/total = {0}/{1}/{2}", debugInfo.LiveStringCount, debugInfo.CollectedStringCount, debugInfo.LiveStringCount + debugInfo.CollectedStringCount); - } - - /// - /// Try to intern the string. - /// The return value indicates the how the string was interned (if at all). - /// - private InternResult TryIntern(T candidate, out string interned) where T : IInternable - { - // First, try the hard coded intern strings. - bool? hardcodedMatchResult = TryMatchHardcodedStrings(candidate, out interned); - if (hardcodedMatchResult != false) - { - // Either matched a hardcoded string or is explicitly not to be interned. - return hardcodedMatchResult.HasValue ? InternResult.MatchedHardcodedString : InternResult.RejectedFromInterning; - } - - interned = _weakStringCache.GetOrCreateEntry(candidate, out bool cacheHit); - return cacheHit ? InternResult.FoundInWeakStringCache : InternResult.AddedToWeakStringCache; - } - - /// - /// Version of Intern that gathers statistics - /// - private string InternWithStatistics(T candidate) where T : IInternable - { - lock (_missedHardcodedStrings) - { - InternResult internResult = TryIntern(candidate, out string result); - - switch (internResult) - { - case InternResult.MatchedHardcodedString: - _hardcodedInternHits++; - break; - case InternResult.FoundInWeakStringCache: - _regularInternHits++; - break; - case InternResult.AddedToWeakStringCache: - _regularInternMisses++; - break; - case InternResult.RejectedFromInterning: - _rejectedStrings++; - break; - } - - if (internResult != InternResult.MatchedHardcodedString && internResult != InternResult.RejectedFromInterning) - { - _missedHardcodedStrings.TryGetValue(result, out int priorCount); - _missedHardcodedStrings[result] = priorCount + 1; - } - - if (!candidate.ReferenceEquals(result)) - { - // Reference changed so 'candidate' is now released and should save memory. - _internEliminatedStrings++; - _internEliminatedChars += candidate.Length; - } - - return result; - } - } - } - - /// - /// Manages a set of mru lists that hold strings in varying size ranges (legacy implementation). - /// - private class BucketedPrioritizedStringList : IInternerImplementation - { - /// - /// The small string Mru list. - /// - private readonly PrioritizedStringList _smallMru; - - /// - /// The large string Mru list. - /// - private readonly PrioritizedStringList _largeMru; - - /// - /// The huge string Mru list. - /// - private readonly PrioritizedStringList _hugeMru; - - /// - /// Three most recently used strings over 8K. - /// - private readonly LinkedList _ginormous = new LinkedList(); - - /// - /// The smallest size a string can be to be considered small. - /// - private readonly int _smallMruThreshold; - - /// - /// The smallest size a string can be to be considered large. - /// - private readonly int _largeMruThreshold; - - /// - /// The smallest size a string can be to be considered huge. - /// - private readonly int _hugeMruThreshold; - - /// - /// The smallest size a string can be to be ginormous. - /// - private readonly int _ginormousThreshold; - - private readonly bool _useSimpleConcurrency; - -#if !CLR2COMPATIBILITY - // ConcurrentDictionary starts with capacity 31 but we're usually adding far more than that. Make a better first capacity guess to reduce - // ConcurrentDictionary having to take all internal locks to upgrade its bucket list. Note that the number should be prime per the - // comments on the code at https://referencesource.microsoft.com/#mscorlib/system/Collections/Concurrent/ConcurrentDictionary.cs,122 - // Also note default lock count is NativeMethodsShared.GetLogicalCoreCount() from the same code. - private const int InitialCapacity = 2053; - private readonly ConcurrentDictionary _internedStrings = new ConcurrentDictionary(NativeMethodsShared.GetLogicalCoreCount(), InitialCapacity, StringComparer.Ordinal); -#endif - -#region Statistics - /// - /// Whether or not to gather statistics - /// - private readonly bool _gatherStatistics; - - /// - /// Number of times interning worked. - /// - private int _internHits; - - /// - /// Number of times interning didn't work. - /// - private int _internMisses; - - /// - /// Number of times interning wasn't attempted. - /// - private int _internRejects; - - /// - /// Total number of strings eliminated by interning. - /// - private int _internEliminatedStrings; - - /// - /// Total number of chars eliminated across all strings. - /// - private int _internEliminatedChars; - - /// - /// Number of times the ginourmous string hit. - /// - private int _ginormousHits; - - /// - /// Number of times the ginourmous string missed. - /// - private int _ginormousMisses; - - /// - /// Chars interned for ginormous range. - /// - private int _ginormousCharsSaved; - - /// - /// Whether or not to track ginormous strings. - /// - private readonly bool _dontTrack; - - /// - /// The time spent interning. - /// - private readonly Stopwatch _stopwatch; - - /// - /// Strings which did not intern - /// - private readonly Dictionary _missedStrings; - - /// - /// Strings which we didn't attempt to intern - /// - private readonly Dictionary _rejectedStrings; - - /// - /// Number of ginormous strings to keep - /// By observation of Auto7, there are about three variations of the huge solution config blob - /// There aren't really any other strings of this size, but make it 10 to be sure. (There will barely be any misses) - /// - private const int GinormousSize = 10; - -#endregion - - /// - /// Construct. - /// - internal BucketedPrioritizedStringList(bool gatherStatistics, int smallMruSize, int largeMruSize, int hugeMruSize, int smallMruThreshold, int largeMruThreshold, int hugeMruThreshold, int ginormousThreshold, bool useSimpleConcurrency) - { - if (smallMruSize == 0 && largeMruSize == 0 && hugeMruSize == 0) - { - _dontTrack = true; - } - - _smallMru = new PrioritizedStringList(smallMruSize); - _largeMru = new PrioritizedStringList(largeMruSize); - _hugeMru = new PrioritizedStringList(hugeMruSize); - _smallMruThreshold = smallMruThreshold; - _largeMruThreshold = largeMruThreshold; - _hugeMruThreshold = hugeMruThreshold; - _ginormousThreshold = ginormousThreshold; - _useSimpleConcurrency = useSimpleConcurrency; - - for (int i = 0; i < GinormousSize; i++) - { - _ginormous.AddFirst(new WeakReference(string.Empty)); - } - - _gatherStatistics = gatherStatistics; - if (gatherStatistics) - { - _stopwatch = new Stopwatch(); - _missedStrings = new Dictionary(StringComparer.Ordinal); - _rejectedStrings = new Dictionary(StringComparer.Ordinal); - } - } - - /// - /// Intern the given internable. - /// - public string InterningToString(T candidate) where T : IInternable - { - if (_gatherStatistics) - { - return InternWithStatistics(candidate); - } - else - { - TryIntern(candidate, out string result); - return result; - } - } - - /// - /// Report statistics to the console. - /// - public void ReportStatistics(string heading) - { - string title = "Opportunistic Intern (" + heading + ")"; - Console.WriteLine("\n{0}{1}{0}", new string('=', 41 - (title.Length / 2)), title); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Intern Hits", _internHits, "hits"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Intern Misses", _internMisses, "misses"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Intern Rejects (as shorter than " + _smallMruThreshold + " bytes)", _internRejects, "rejects"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Strings*", _internEliminatedStrings, "strings"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Chars", _internEliminatedChars, "chars"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Estimated Eliminated Bytes", _internEliminatedChars * 2, "bytes"); - Console.WriteLine("Elimination assumes that strings provided were unique objects."); - Console.WriteLine("|---------------------------------------------------------------------------------|"); - KeyValuePair held = _smallMru.Statistics(); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Small Strings MRU Size", Instance._smallMruSize, "strings"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Small Strings (>=" + _smallMruThreshold + " chars) Held", held.Key, "strings"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Small Estimated Bytes Held", held.Value * 2, "bytes"); - Console.WriteLine("|---------------------------------------------------------------------------------|"); - held = _largeMru.Statistics(); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Large Strings MRU Size", Instance._largeMruSize, "strings"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Large Strings (>=" + _largeMruThreshold + " chars) Held", held.Key, "strings"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Large Estimated Bytes Held", held.Value * 2, "bytes"); - Console.WriteLine("|---------------------------------------------------------------------------------|"); - held = _hugeMru.Statistics(); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Huge Strings MRU Size", Instance._hugeMruSize, "strings"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Huge Strings (>=" + _hugeMruThreshold + " chars) Held", held.Key, "strings"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Huge Estimated Bytes Held", held.Value * 2, "bytes"); - Console.WriteLine("|---------------------------------------------------------------------------------|"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Ginormous Strings MRU Size", GinormousSize, "strings"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Ginormous (>=" + _ginormousThreshold + " chars) Hits", _ginormousHits, "hits"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Ginormous Misses", _ginormousMisses, "misses"); - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Ginormous Chars Saved", _ginormousCharsSaved, "chars"); - Console.WriteLine("|---------------------------------------------------------------------------------|"); - - // There's no point in reporting the ginormous string because it will have evaporated by now. - Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Time Spent Interning", _stopwatch.ElapsedMilliseconds, "ms"); - Console.WriteLine("{0}{0}", new string('=', 41)); - - IEnumerable topMissingString = - _missedStrings - .OrderByDescending(kv => kv.Value * kv.Key.Length) - .Take(15) - .Where(kv => kv.Value > 1) - .Select(kv => string.Format(CultureInfo.InvariantCulture, "({1} instances x each {2} chars = {3}KB wasted)\n{0}", kv.Key, kv.Value, kv.Key.Length, (kv.Value - 1) * kv.Key.Length * 2 / 1024)); - - Console.WriteLine("##########Top Missed Strings: \n{0} ", string.Join("\n==============\n", topMissingString.ToArray())); - Console.WriteLine(); - - IEnumerable topRejectedString = - _rejectedStrings - .OrderByDescending(kv => kv.Value * kv.Key.Length) - .Take(15) - .Where(kv => kv.Value > 1) - .Select(kv => string.Format(CultureInfo.InvariantCulture, "({1} instances x each {2} chars = {3}KB wasted)\n{0}", kv.Key, kv.Value, kv.Key.Length, (kv.Value - 1) * kv.Key.Length * 2 / 1024)); - - Console.WriteLine("##########Top Rejected Strings: \n{0} ", string.Join("\n==============\n", topRejectedString.ToArray())); - } - - /// - /// Try to intern the string. - /// Return true if an interned value could be returned. - /// Return false if it was added to the intern list, but wasn't there already. - /// Return null if it didn't meet the length criteria for any of the buckets. Interning was rejected - /// - private bool? TryIntern(T candidate, out string interned) where T : IInternable - { - int length = candidate.Length; - interned = null; - - // First, try the hard coded intern strings. - // Each of the hard-coded small strings below showed up in a profile run with considerable duplication in memory. - if (!_dontTrack) - { - bool? hardcodedMatchResult = TryMatchHardcodedStrings(candidate, out interned); - if (hardcodedMatchResult != false) - { - // Either matched a hardcoded string or is explicitly not to be interned. - return hardcodedMatchResult; - } - - if (length > _ginormousThreshold) - { - lock (_ginormous) - { - LinkedListNode current = _ginormous.First; - - while (current != null) - { - if (current.Value.Target is string last && last.Length == candidate.Length && candidate.StartsWithStringByOrdinalComparison(last)) - { - interned = last; - _ginormousHits++; - _ginormousCharsSaved += last.Length; - - _ginormous.Remove(current); - _ginormous.AddFirst(current); - - return true; - } - - current = current.Next; - } - - _ginormousMisses++; - interned = candidate.ExpensiveConvertToString(); - - LinkedListNode lastNode = _ginormous.Last; - _ginormous.RemoveLast(); - _ginormous.AddFirst(lastNode); - lastNode.Value.Target = interned; - - return false; - } - } -#if !CLR2COMPATIBILITY - else if (_useSimpleConcurrency) - { - var stringified = candidate.ExpensiveConvertToString(); - interned = _internedStrings.GetOrAdd(stringified, stringified); - return true; - } -#endif - else if (length >= _hugeMruThreshold) - { - lock (_hugeMru) - { - return _hugeMru.TryGet(candidate, out interned); - } - } - else if (length >= _largeMruThreshold) - { - lock (_largeMru) - { - return _largeMru.TryGet(candidate, out interned); - } - } - else if (length >= _smallMruThreshold) - { - lock (_smallMru) - { - return _smallMru.TryGet(candidate, out interned); - } - } - } - - interned = candidate.ExpensiveConvertToString(); - return null; - } - - /// - /// Version of Intern that gathers statistics - /// - private string InternWithStatistics(T candidate) where T : IInternable - { - lock (_missedStrings) - { - _stopwatch.Start(); - bool? interned = TryIntern(candidate, out string result); - _stopwatch.Stop(); - - if (interned.HasValue && !interned.Value) - { - // Could not intern. - _internMisses++; - - _missedStrings.TryGetValue(result, out int priorCount); - _missedStrings[result] = priorCount + 1; - - return result; - } - else if (interned == null) - { - // Decided not to attempt interning - _internRejects++; - - _rejectedStrings.TryGetValue(result, out int priorCount); - _rejectedStrings[result] = priorCount + 1; - - return result; - } - - _internHits++; - if (!candidate.ReferenceEquals(result)) - { - // Reference changed so 'candidate' is now released and should save memory. - _internEliminatedStrings++; - _internEliminatedChars += candidate.Length; - } - - return result; - } - } - - /// - /// A singly linked list of strings where the most recently accessed string is at the top. - /// Size expands up to a fixed number of strings. - /// - private class PrioritizedStringList - { - /// - /// Maximum size of the mru list. - /// - private readonly int _size; - - /// - /// Head of the mru list. - /// - private Node _mru; - - /// - /// Construct an Mru list with a fixed maximum size. - /// - internal PrioritizedStringList(int size) - { - _size = size; - } - - /// - /// Try to get one element from the list. Upon leaving the function 'candidate' will be at the head of the Mru list. - /// This function is not thread-safe. - /// - internal bool TryGet(T candidate, out string interned) where T : IInternable - { - if (_size == 0) - { - interned = candidate.ExpensiveConvertToString(); - return false; - } - - int length = candidate.Length; - Node secondPrior = null; - Node prior = null; - Node head = _mru; - bool found = false; - int itemCount = 0; - - while (head != null && !found) - { - if (head.Value.Length == length) - { - if (candidate.StartsWithStringByOrdinalComparison(head.Value)) - { - found = true; - } - } - - if (!found) - { - secondPrior = prior; - prior = head; - head = head.Next; - } - - itemCount++; - } - - if (found) - { - // Move it to the top and return the interned version. - if (prior != null) - { - if (!candidate.ReferenceEquals(head.Value)) - { - // Wasn't at the top already, so move it there. - prior.Next = head.Next; - head.Next = _mru; - _mru = head; - interned = _mru.Value; - return true; - } - else - { - // But don't move it up if there is reference equality so that multiple calls to Intern don't redundantly emphasize a string. - interned = head.Value; - return true; - } - } - else - { - // Found the item in the top spot. No need to move anything. - interned = _mru.Value; - return true; - } - } - else - { - // Not found. Create a new entry and place it at the top. - Node old = _mru; - _mru = new Node(candidate.ExpensiveConvertToString()) { Next = old }; - - // Cache miss. Use this opportunity to discard any element over the max size. - if (itemCount >= _size && secondPrior != null) - { - secondPrior.Next = null; - } - - interned = _mru.Value; - return false; - } - } - - /// - /// Returns the number of strings held and the total number of chars held. - /// - internal KeyValuePair Statistics() - { - Node head = _mru; - int chars = 0; - int strings = 0; - while (head != null) - { - chars += head.Value.Length; - strings++; - head = head.Next; - } - - return new KeyValuePair(strings, chars); - } - - /// - /// Singly linked list node. - /// - private class Node - { - /// - /// Construct a Node - /// - internal Node(string value) - { - Value = value; - } - - /// - /// The next node in the list. - /// - internal Node Next { get; set; } - - /// - /// The held string. - /// - internal string Value { get; } - } - } - } - } -} diff --git a/src/Shared/PropertyParser.cs b/src/Shared/PropertyParser.cs index 7ca92675832..e10f51efc8f 100644 --- a/src/Shared/PropertyParser.cs +++ b/src/Shared/PropertyParser.cs @@ -2,10 +2,10 @@ // Licensed under the MIT license. See LICENSE file in the project root for full license information. using System; -using System.Text; using System.Collections.Generic; using Microsoft.Build.Framework; using Microsoft.Build.Shared; +using Microsoft.NET.StringTools; #if BUILD_ENGINE namespace Microsoft.Build.BackEnd @@ -146,8 +146,7 @@ internal static bool GetTableWithEscaping(TaskLoggingHelper log, string paramete // There was a property definition previous to this one. Append the current string // to that previous value, using semicolon as a separator. string propertyValue = EscapingUtilities.Escape(propertyNameValueString.Trim()); - finalPropertiesList[finalPropertiesList.Count - 1].Value.Append(';'); - finalPropertiesList[finalPropertiesList.Count - 1].Value.Append(propertyValue); + finalPropertiesList[finalPropertiesList.Count - 1].Value.Add(propertyValue); } else { @@ -163,9 +162,22 @@ internal static bool GetTableWithEscaping(TaskLoggingHelper log, string paramete // needs to pass onto the engine. log?.LogMessageFromText(parameterName, MessageImportance.Low); + using SpanBasedStringBuilder stringBuilder = Strings.GetSpanBasedStringBuilder(); foreach (PropertyNameValuePair propertyNameValuePair in finalPropertiesList) { - string propertyValue = OpportunisticIntern.StringBuilderToString(propertyNameValuePair.Value); + stringBuilder.Clear(); + bool needsSemicolon = false; + foreach (string valueFragment in propertyNameValuePair.Value) + { + if (needsSemicolon) + { + stringBuilder.Append(";"); + } + needsSemicolon = true; + stringBuilder.Append(valueFragment); + } + + string propertyValue = stringBuilder.ToString(); finalPropertiesTable[propertyNameValuePair.Name] = propertyValue; log?.LogMessageFromText( $" {propertyNameValuePair.Name}={propertyValue}", @@ -187,14 +199,17 @@ private class PropertyNameValuePair internal string Name { get; } /// - /// Property value + /// Property value fragments. Join with semicolon to get the final value. /// - internal StringBuilder Value { get; } + internal List Value { get; } internal PropertyNameValuePair(string propertyName, string propertyValue) { Name = propertyName; - Value = new StringBuilder(propertyValue); + Value = new List + { + propertyValue + }; } } } diff --git a/src/Shared/ReuseableStringBuilder.cs b/src/Shared/ReuseableStringBuilder.cs index fbcaabe7e11..8abf89a0093 100644 --- a/src/Shared/ReuseableStringBuilder.cs +++ b/src/Shared/ReuseableStringBuilder.cs @@ -15,21 +15,15 @@ namespace Microsoft.Build.Shared /// A StringBuilder lookalike that reuses its internal storage. /// /// - /// You can add any properties or methods on the real StringBuilder that are needed. + /// This class is being deprecated in favor of SpanBasedStringBuilder in StringTools. Avoid adding more uses. /// - internal sealed class ReuseableStringBuilder : IDisposable, IInternable + internal sealed class ReuseableStringBuilder : IDisposable { /// /// Captured string builder. /// private StringBuilder _borrowedBuilder; - /// - /// Profiling showed that the hot code path for large string builder calls first IsOrdinalEqualToStringOfSameLength followed by ExpensiveConvertToString - /// when IsOrdinalEqualToStringOfSameLength did return true. We can therefore reduce the costs for large strings by over a factor two. - /// - private string _cachedString; - /// /// Capacity to initialize the builder with. /// @@ -58,71 +52,6 @@ public int Length } } - /// - /// Indexer into the target. Presumed to be fast. - /// - char IInternable.this[int index] - { - get - { - LazyPrepare(); // Must have one to call this - return _borrowedBuilder[index]; - } - } - - /// - /// Convert target to string. Presumed to be slow (and will be called just once). - /// - string IInternable.ExpensiveConvertToString() - { - if( _cachedString == null) - { - _cachedString = ((ReuseableStringBuilder)this).ToString(); - } - return _cachedString; - } - - /// - /// The number here is arbitrary. For a StringBuilder we have a chunk length of 8000 characters which corresponds to - /// 5 StringBuilder chunks which need to be walked before the next character can be fetched (see MaxChunkSize of StringBuilder). - /// That should be a good compromise to not allocate to much but still make use of the intern cache. The actual cutoff where it is cheaper - /// to allocate a temp string might be well below that limit but that depends on many other factors such as GC Heap size and other allocating threads. - /// - const int MaxByCharCompareLength = 40 * 1000; - - /// - /// Compare target to string. - /// - bool IInternable.StartsWithStringByOrdinalComparison(string other) - { -#if DEBUG - ErrorUtilities.VerifyThrow(other.Length <= _borrowedBuilder.Length, "should be at most as long as target"); -#endif - if (other.Length > MaxByCharCompareLength) - { - return ((IInternable) this).ExpensiveConvertToString().StartsWith(other, StringComparison.Ordinal); - } - // Backwards because the end of the string is (by observation of Australian Government build) more likely to be different earlier in the loop. - // For example, C:\project1, C:\project2 - for (int i = other.Length - 1; i >= 0; --i) - { - if (_borrowedBuilder[i] != other[i]) - { - return false; - } - } - - return true; - } - - /// - /// Never reference equals to string. - /// - bool IInternable.ReferenceEquals(string other) - { - return false; - } - /// /// Convert to a string. /// @@ -144,7 +73,6 @@ void IDisposable.Dispose() if (_borrowedBuilder != null) { ReuseableStringBuilderFactory.Release(_borrowedBuilder); - _cachedString = null; _borrowedBuilder = null; _capacity = -1; } @@ -156,7 +84,6 @@ void IDisposable.Dispose() internal ReuseableStringBuilder Append(char value) { LazyPrepare(); - _cachedString = null; _borrowedBuilder.Append(value); return this; } @@ -167,7 +94,6 @@ internal ReuseableStringBuilder Append(char value) internal ReuseableStringBuilder Append(string value) { LazyPrepare(); - _cachedString = null; _borrowedBuilder.Append(value); return this; } @@ -178,7 +104,6 @@ internal ReuseableStringBuilder Append(string value) internal ReuseableStringBuilder Append(string value, int startIndex, int count) { LazyPrepare(); - _cachedString = null; _borrowedBuilder.Append(value, startIndex, count); return this; } @@ -186,7 +111,6 @@ internal ReuseableStringBuilder Append(string value, int startIndex, int count) public ReuseableStringBuilder AppendSeparated(char separator, ICollection strings) { LazyPrepare(); - _cachedString = null; var separatorsRemaining = strings.Count - 1; @@ -208,7 +132,6 @@ public ReuseableStringBuilder AppendSeparated(char separator, ICollection public readonly bool CacheFileExistence = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable("MsBuildCacheFileExistence")); - /// - /// Use the legacy string interning implementation based on MRU lists. - /// - public readonly bool UseLegacyStringInterner = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable("MSBuildUseLegacyStringInterner")); - - /// - /// Eliminate locking in OpportunisticIntern at the expense of memory (in effect only if UseLegacyStringInterner is set). - /// - public readonly bool UseSimpleInternConcurrency = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable("MSBuildUseSimpleInternConcurrency")); - public readonly bool UseSimpleProjectRootElementCacheConcurrency = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable("MsBuildUseSimpleProjectRootElementCacheConcurrency")); /// diff --git a/src/Shared/WeakStringCache.Concurrent.cs b/src/Shared/WeakStringCache.Concurrent.cs deleted file mode 100644 index 318aeafc131..00000000000 --- a/src/Shared/WeakStringCache.Concurrent.cs +++ /dev/null @@ -1,137 +0,0 @@ - -// Copyright (c) Microsoft. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - -using System; -using System.Collections.Concurrent; -using System.Collections.Generic; -using Microsoft.Build.Shared; - -namespace Microsoft.Build -{ - /// - /// Implements the WeakStringCache functionality on modern .NET versions where ConcurrentDictionary is available. - /// - internal sealed partial class WeakStringCache : IDisposable - { - private readonly ConcurrentDictionary _stringsByHashCode; - - public WeakStringCache() - { - _stringsByHashCode = new ConcurrentDictionary(NativeMethodsShared.GetLogicalCoreCount(), _initialCapacity); - } - - /// - /// Main entrypoint of this cache. Tries to look up a string that matches the given internable. If it succeeds, returns - /// the string and sets cacheHit to true. If the string is not found, calls ExpensiveConvertToString on the internable, - /// adds the resulting string to the cache, and returns it, setting cacheHit to false. - /// - /// The internable describing the string we're looking for. - /// true if match found in cache, false otherwise. - /// A string matching the given internable. - /// - /// This method performs two operations on the underlying ConcurrentDictionary on both cache hit and cache miss. - /// 1. It checks whether the dictionary has a matching entry. The entry is temporarily removed from the cache so it doesn't - /// race with Scavenge() freeing GC handles. This is the first operation. - /// 2a. If there is a matching entry, we extract the string out of it and put it back in the cache (the second operation). - /// 2b. If there is an entry but it doesn't match, or there is no entry for the given hash code, we extract the string from - /// the internable, set it on the entry, and add the entry (back) in the cache. - /// - public string GetOrCreateEntry(T internable, out bool cacheHit) where T : IInternable - { - int hashCode = GetInternableHashCode(internable); - - StringWeakHandle handle; - string result; - bool addingNewHandle = false; - - // Get the existing handle from the cache and assume ownership by removing it. We can't use the simple TryGetValue() here because - // the Scavenge method running on another thread could free the handle from underneath us. - if (_stringsByHashCode.TryRemove(hashCode, out handle)) - { - result = handle.GetString(internable); - if (result != null) - { - // We have a hit, put the handle back in the cache. - if (!_stringsByHashCode.TryAdd(hashCode, handle)) - { - // Another thread has managed to add a handle for the same hash code, so the one we got can be freed. - handle.Free(); - } - cacheHit = true; - return result; - } - } - else - { - handle = new StringWeakHandle(); - addingNewHandle = true; - } - - // We don't have the string in the cache - create it. - result = internable.ExpensiveConvertToString(); - - // Set the handle to reference the new string and put it in the cache. - handle.SetString(result); - if (!_stringsByHashCode.TryAdd(hashCode, handle)) - { - // Another thread has managed to add a handle for the same hash code, so the one we got can be freed. - handle.Free(); - } - - // Remove unused handles if our heuristic indicates that it would be productive. Note that the _scavengeThreshold field - // accesses are not protected from races. Being atomic (as guaranteed by the 32-bit data type) is enough here. - if (addingNewHandle) - { - // Prevent the dictionary from growing forever with GC handles that don't reference live strings anymore. - if (_stringsByHashCode.Count >= _scavengeThreshold) - { - // Before we start scavenging set _scavengeThreshold to a high value to effectively lock other threads from - // running Scavenge at the same time (minus rare races). - _scavengeThreshold = int.MaxValue; - try - { - // Get rid of unused handles. - Scavenge(); - } - finally - { - // And do this again when the number of handles reaches double the current after-scavenge number. - _scavengeThreshold = _stringsByHashCode.Count * 2; - } - } - } - - cacheHit = false; - return result; - } - - /// - /// Iterates over the cache and removes unused GC handles, i.e. handles that don't reference live strings. - /// This is expensive so try to call such that the cost is amortized to O(1) per GetOrCreateEntry() invocation. - /// - public void Scavenge() - { - foreach (KeyValuePair entry in _stringsByHashCode) - { - if (!entry.Value.IsUsed && _stringsByHashCode.TryRemove(entry.Key, out StringWeakHandle removedHandle)) - { - // Note that the removed handle may be different from the one we got from the enumerator so check again - // and try to put it back if it's still in use. - if (!removedHandle.IsUsed || !_stringsByHashCode.TryAdd(entry.Key, removedHandle)) - { - removedHandle.Free(); - } - } - } - } - - /// - /// Returns internal debug counters calculated based on the current state of the cache. - /// - public DebugInfo GetDebugInfo() - { - return GetDebugInfoImpl(); - } - } -} diff --git a/src/StringTools.Benchmark/Program.cs b/src/StringTools.Benchmark/Program.cs new file mode 100644 index 00000000000..7bdd21ed36a --- /dev/null +++ b/src/StringTools.Benchmark/Program.cs @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using BenchmarkDotNet.Running; + +namespace Microsoft.NET.StringTools.Benchmark +{ + public class Program + { + public static void Main(string[] args) + { + BenchmarkRunner.Run(); + } + } +} diff --git a/src/StringTools.Benchmark/SpanBasedStringBuilder_Benchmark.cs b/src/StringTools.Benchmark/SpanBasedStringBuilder_Benchmark.cs new file mode 100644 index 00000000000..03fa15ccfc5 --- /dev/null +++ b/src/StringTools.Benchmark/SpanBasedStringBuilder_Benchmark.cs @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using BenchmarkDotNet.Attributes; +using System.Text; + +namespace Microsoft.NET.StringTools.Benchmark +{ + [MemoryDiagnoser] + public class SpanBasedStringBuilder_Benchmark + { + [Params(1, 2, 4, 8, 16, 256)] + public int NumSubstrings { get; set; } + + [Params(1, 8, 32, 128, 512)] + public int SubstringLengths { get; set; } + + private string[] _subStrings; + + private static SpanBasedStringBuilder _pooledSpanBasedStringBuilder = new SpanBasedStringBuilder(); + private static StringBuilder _pooledStringBuilder = new StringBuilder(); + + private static int _uniqueStringCounter = 0; + + [GlobalSetup] + public void GlobalSetup() + { + _subStrings = new string[NumSubstrings]; + for (int i = 0; i < _subStrings.Length; i++) + { + _subStrings[i] = new string('a', SubstringLengths); + } + } + + [Benchmark] + public void SpanBasedOperations_CacheHit() + { + SpanBasedStringBuilder sbsb = _pooledSpanBasedStringBuilder; + sbsb.Clear(); + foreach (string subString in _subStrings) + { + sbsb.Append(subString); + } + sbsb.ToString(); + } + + [Benchmark] + public void RegularOperations_CacheHit() + { + StringBuilder sb = _pooledStringBuilder; + sb.Clear(); + foreach (string subString in _subStrings) + { + sb.Append(subString); + } + Strings.WeakIntern(sb.ToString()); + } + + [Benchmark] + public void SpanBasedOperations_CacheMiss() + { + SpanBasedStringBuilder sbsb = _pooledSpanBasedStringBuilder; + sbsb.Clear(); + foreach (string subString in _subStrings) + { + sbsb.Append(subString); + } + sbsb.Append(_uniqueStringCounter++.ToString("X8")); + sbsb.ToString(); + } + + [Benchmark] + public void RegularOperations_CacheMiss() + { + StringBuilder sb = _pooledStringBuilder; + sb.Clear(); + foreach (string subString in _subStrings) + { + sb.Append(subString); + } + sb.Append(_uniqueStringCounter++.ToString("X8")); + Strings.WeakIntern(sb.ToString()); + } + } +} diff --git a/src/StringTools.Benchmark/StringTools.Benchmark.csproj b/src/StringTools.Benchmark/StringTools.Benchmark.csproj new file mode 100644 index 00000000000..eb1bf1347f3 --- /dev/null +++ b/src/StringTools.Benchmark/StringTools.Benchmark.csproj @@ -0,0 +1,22 @@ + + + Exe + false + $(RuntimeOutputTargetFrameworks) + $(RuntimeOutputPlatformTarget) + + false + true + + StringTools.Benchmark + Microsoft.NET.StringTools.Benchmark.Program + + + + + + + + + + diff --git a/src/StringTools.UnitTests/InterningTestData.cs b/src/StringTools.UnitTests/InterningTestData.cs new file mode 100644 index 00000000000..857e9032850 --- /dev/null +++ b/src/StringTools.UnitTests/InterningTestData.cs @@ -0,0 +1,68 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System.Collections.Generic; + +namespace Microsoft.NET.StringTools.Tests +{ + public static class InterningTestData + { + /// + /// Represents an array of string fragments to initialize an InternableString with. + /// + public class TestDatum + { + private string _string; + public string[] Fragments { get; } + + public int Length => _string.Length; + + public TestDatum(params string[] fragments) + { + Fragments = fragments; + _string = string.Join(string.Empty, Fragments); + } + + public char this[int index] => _string[index]; + + public override string ToString() + { + return _string; + } + } + + public static IEnumerable TestData + { + get + { + yield return new object[] { new TestDatum((string)null) }; + yield return new object[] { new TestDatum("") }; + yield return new object[] { new TestDatum("Test") }; + yield return new object[] { new TestDatum(null, "All") }; + yield return new object[] { new TestDatum("", "All") }; + yield return new object[] { new TestDatum("", "All", "") }; + yield return new object[] { new TestDatum("Test", "All", "The", "Things") }; + } + } + + public static IEnumerable TestDataForTrim + { + get + { + yield return new object[] { new TestDatum((string)null) }; + yield return new object[] { new TestDatum("") }; + yield return new object[] { new TestDatum(" ") }; + yield return new object[] { new TestDatum(" ") }; + yield return new object[] { new TestDatum(null, "") }; + yield return new object[] { new TestDatum(null, " ") }; + yield return new object[] { new TestDatum(" T ") }; + yield return new object[] { new TestDatum(" Test ") }; + yield return new object[] { new TestDatum(null, " Test ") }; + yield return new object[] { new TestDatum(null, " Test All ") }; + yield return new object[] { new TestDatum(" ", " Test", "", "All ", " ") }; + yield return new object[] { new TestDatum("Test", " ", "", " ", " ") }; + yield return new object[] { new TestDatum("Test", " All ", " The ", "Things") }; + } + } + } +} diff --git a/src/StringTools.UnitTests/SpanBasedStringBuilder_Tests.cs b/src/StringTools.UnitTests/SpanBasedStringBuilder_Tests.cs new file mode 100644 index 00000000000..9be63b4b714 --- /dev/null +++ b/src/StringTools.UnitTests/SpanBasedStringBuilder_Tests.cs @@ -0,0 +1,154 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +#if NET35_UNITTEST +extern alias StringToolsNet35; +#endif + +using System.Collections.Generic; + +using Shouldly; +using Xunit; + +#if NET35_UNITTEST +using StringToolsNet35::Microsoft.NET.StringTools; +#endif + +namespace Microsoft.NET.StringTools.Tests +{ + public class SpanBasedStringBuilder_Tests + { + private SpanBasedStringBuilder MakeSpanBasedStringBuilder(InterningTestData.TestDatum datum, bool appendSubStrings = false) + { + bool wrapFirstFragment = datum.Fragments.Length > 0 && datum.Fragments[0] != null; + + SpanBasedStringBuilder stringBuilder = wrapFirstFragment + ? new SpanBasedStringBuilder(datum.Fragments[0]) + : new SpanBasedStringBuilder(); + + for (int i = 1; i < datum.Fragments.Length; i++) + { + if (appendSubStrings) + { + int index = datum.Fragments[i].Length / 2; + stringBuilder.Append(datum.Fragments[i], 0, index); + stringBuilder.Append(datum.Fragments[i], index, datum.Fragments[i].Length - index); + } + else + { + stringBuilder.Append(datum.Fragments[i]); + } + } + return stringBuilder; + } + + public static IEnumerable TestData => InterningTestData.TestData; + public static IEnumerable TestDataForTrim => InterningTestData.TestDataForTrim; + + [Theory] + [MemberData(nameof(TestData))] + public void LengthReturnsLength(InterningTestData.TestDatum datum) + { + MakeSpanBasedStringBuilder(datum).Length.ShouldBe(datum.Length); + } + + [Theory] + [MemberData(nameof(TestData))] + public void EnumeratorEnumeratesCharacters(InterningTestData.TestDatum datum) + { + SpanBasedStringBuilder stringBuilder = MakeSpanBasedStringBuilder(datum); + int index = 0; + foreach (char ch in stringBuilder) + { + ch.ShouldBe(datum[index]); + index++; + } + } + + [Theory] + [MemberData(nameof(TestData))] + public void EqualsReturnsExpectedValue(InterningTestData.TestDatum datum) + { + InternableString internableString = new InternableString(MakeSpanBasedStringBuilder(datum)); + internableString.Equals(string.Empty).ShouldBe(internableString.Length == 0); + + string substr = datum.Fragments[0] ?? string.Empty; + internableString.Equals(substr).ShouldBe(substr.Length == internableString.Length); + + if (datum.Fragments.Length > 1) + { + substr += datum.Fragments[1]; + internableString.Equals(substr).ShouldBe(substr.Length == internableString.Length); + + internableString.Equals(datum.ToString()).ShouldBeTrue(); + } + + internableString.Equals("Things").ShouldBeFalse(); + } + + [Fact] + public void ReferenceEqualsReturnsExpectedValue() + { + string str = "Test"; + InternableString internableString = new InternableString(str); + internableString.ReferenceEquals(str).ShouldBeTrue(); + internableString = new InternableString(new string(str.ToCharArray())); + internableString.ReferenceEquals(str).ShouldBeFalse(); + } + + [Theory] + [MemberData(nameof(TestData))] + public void AppendAppendsString(InterningTestData.TestDatum datum) + { + SpanBasedStringBuilder stringBuilder = MakeSpanBasedStringBuilder(datum, false); + new InternableString(stringBuilder).ExpensiveConvertToString().ShouldBe(datum.ToString()); + } + + [Theory] + [MemberData(nameof(TestData))] + public void AppendAppendsSubstring(InterningTestData.TestDatum datum) + { + SpanBasedStringBuilder stringBuilder = MakeSpanBasedStringBuilder(datum, true); + new InternableString(stringBuilder).ExpensiveConvertToString().ShouldBe(datum.ToString()); + } + +#if !NET35_UNITTEST + [Theory] + [MemberData(nameof(TestDataForTrim))] + public void TrimStartRemovesLeadingWhiteSpace(InterningTestData.TestDatum datum) + { + SpanBasedStringBuilder stringBuilder = MakeSpanBasedStringBuilder(datum); + stringBuilder.TrimStart(); + new InternableString(stringBuilder).ExpensiveConvertToString().ShouldBe(datum.ToString().TrimStart()); + } + + [Theory] + [MemberData(nameof(TestDataForTrim))] + public void TrimEndRemovesTrailingWhiteSpace(InterningTestData.TestDatum datum) + { + SpanBasedStringBuilder stringBuilder = MakeSpanBasedStringBuilder(datum); + stringBuilder.TrimEnd(); + new InternableString(stringBuilder).ExpensiveConvertToString().ShouldBe(datum.ToString().TrimEnd()); + } + + [Theory] + [MemberData(nameof(TestDataForTrim))] + public void TrimRemovesLeadingAndTrailingWhiteSpace(InterningTestData.TestDatum datum) + { + SpanBasedStringBuilder stringBuilder = MakeSpanBasedStringBuilder(datum); + stringBuilder.Trim(); + new InternableString(stringBuilder).ExpensiveConvertToString().ShouldBe(datum.ToString().Trim()); + } +#endif + + [Theory] + [MemberData(nameof(TestData))] + public void ClearRemovesAllCharacters(InterningTestData.TestDatum datum) + { + SpanBasedStringBuilder stringBuilder = MakeSpanBasedStringBuilder(datum); + stringBuilder.Clear(); + stringBuilder.Length.ShouldBe(0); + stringBuilder.GetEnumerator().MoveNext().ShouldBeFalse(); + } + } +} diff --git a/src/StringTools.UnitTests/StringTools.UnitTests.csproj b/src/StringTools.UnitTests/StringTools.UnitTests.csproj new file mode 100644 index 00000000000..e11fc1d60ce --- /dev/null +++ b/src/StringTools.UnitTests/StringTools.UnitTests.csproj @@ -0,0 +1,30 @@ + + + $(RuntimeOutputTargetFrameworks) + $(RuntimeOutputPlatformTarget) + + false + + Microsoft.NET.StringTools.UnitTests + true + true + + + + + + + + + + + + + App.config + Designer + + + PreserveNewest + + + diff --git a/src/StringTools.UnitTests/StringTools.UnitTests.net35.csproj b/src/StringTools.UnitTests/StringTools.UnitTests.net35.csproj new file mode 100644 index 00000000000..0c10b4d1f04 --- /dev/null +++ b/src/StringTools.UnitTests/StringTools.UnitTests.net35.csproj @@ -0,0 +1,40 @@ + + + + + + + $(FullFrameworkTFM) + $(RuntimeOutputPlatformTarget) + + false + + Microsoft.NET.StringTools.net35.UnitTests + true + true + $(DefineConstants);NET35_UNITTEST + + + + + + + + + + + TargetFramework=net35 + + + + + + App.config + Designer + + + PreserveNewest + + + diff --git a/src/StringTools.UnitTests/StringTools_Tests.cs b/src/StringTools.UnitTests/StringTools_Tests.cs new file mode 100644 index 00000000000..7f396ab32b6 --- /dev/null +++ b/src/StringTools.UnitTests/StringTools_Tests.cs @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +#if NET35_UNITTEST +extern alias StringToolsNet35; +#endif + +using System; + +using Shouldly; +using Xunit; + +#if NET35_UNITTEST +using StringToolsNet35::Microsoft.NET.StringTools; +using Shouldly.Configuration; +#else +using Microsoft.NET.StringTools; +#endif + +namespace Microsoft.NET.StringTools.Tests +{ + public class StringTools_Tests + { + [Theory] + [InlineData("")] + [InlineData("A")] + [InlineData("Hello")] + [InlineData("HelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHelloHello")] + public void InternsStrings(string str) + { + string internedString1 = Strings.WeakIntern(str); + internedString1.Equals(str).ShouldBeTrue(); + string internedString2 = Strings.WeakIntern(str); + internedString1.Equals(str).ShouldBeTrue(); + Object.ReferenceEquals(internedString1, internedString2).ShouldBeTrue(); + +#if !NET35_UNITTEST + ReadOnlySpan span = str.AsSpan(); + internedString1 = Strings.WeakIntern(span); + internedString1.Equals(str).ShouldBeTrue(); + internedString2 = Strings.WeakIntern(span); + internedString1.Equals(str).ShouldBeTrue(); + Object.ReferenceEquals(internedString1, internedString2).ShouldBeTrue(); +#endif + } + + [Fact] + public void CreatesDiagnosticReport() + { + string statisticsNotEnabledString = "EnableStatisticsGathering() has not been called"; + + Strings.CreateDiagnosticReport().ShouldContain(statisticsNotEnabledString); + + Strings.EnableDiagnostics(); + string report = Strings.CreateDiagnosticReport(); + + report.ShouldNotContain(statisticsNotEnabledString); + report.ShouldContain("Eliminated Strings"); + } + } +} diff --git a/src/Build.UnitTests/WeakStringCache_Tests.cs b/src/StringTools.UnitTests/WeakStringCache_Tests.cs similarity index 87% rename from src/Build.UnitTests/WeakStringCache_Tests.cs rename to src/StringTools.UnitTests/WeakStringCache_Tests.cs index ece79899fd0..bddfc60917b 100644 --- a/src/Build.UnitTests/WeakStringCache_Tests.cs +++ b/src/StringTools.UnitTests/WeakStringCache_Tests.cs @@ -1,6 +1,10 @@ // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. +#if NET35_UNITTEST +extern alias StringToolsNet35; +#endif + using System; using System.Linq; using System.Runtime.CompilerServices; @@ -9,7 +13,11 @@ using Shouldly; using Xunit; -namespace Microsoft.Build.UnitTests +#if NET35_UNITTEST +using StringToolsNet35::Microsoft.NET.StringTools; +#endif + +namespace Microsoft.NET.StringTools.Tests { public class WeakStringCache_Tests : IDisposable { @@ -35,11 +43,11 @@ private int AddString(string strPart1, string strPart2, Action callbackT { // Compose the string with SB so it doesn't get interned by the runtime. string testString = new StringBuilder(strPart1).Append(strPart2).ToString(); - StringInternTarget testStringTarget = new StringInternTarget(testString); + InternableString testStringTarget = new InternableString(testString); - int hashCode = WeakStringCache.GetInternableHashCode(testStringTarget); + int hashCode = testStringTarget.GetHashCode(); - string cachedString = _cache.GetOrCreateEntry(testStringTarget, out bool cacheHit); + string cachedString = _cache.GetOrCreateEntry(ref testStringTarget, out bool cacheHit); cacheHit.ShouldBeFalse(); cachedString.ShouldBeSameAs(testString); @@ -47,7 +55,8 @@ private int AddString(string strPart1, string strPart2, Action callbackT // Verify that the string is really in the cache and the cache returns the interned instance. string testStringCopy = new StringBuilder(strPart1).Append(strPart2).ToString(); - cachedString = _cache.GetOrCreateEntry(new StringInternTarget(testStringCopy), out cacheHit); + InternableString testStringCopyTarget = new InternableString(testStringCopy); + cachedString = _cache.GetOrCreateEntry(ref testStringCopyTarget, out cacheHit); cacheHit.ShouldBeTrue(); cachedString.ShouldBeSameAs(testString); @@ -56,7 +65,8 @@ private int AddString(string strPart1, string strPart2, Action callbackT callbackToRunWithTheStringAlive(cachedString); - cachedString = _cache.GetOrCreateEntry(new StringInternTarget(testStringCopy), out cacheHit); + testStringCopyTarget = new InternableString(testStringCopy); + cachedString = _cache.GetOrCreateEntry(ref testStringCopyTarget, out cacheHit); cacheHit.ShouldBeTrue(); cachedString.ShouldBeSameAs(testString); @@ -95,8 +105,8 @@ private void AddStringsWithSameHashCode(int numberOfStrings) // There are no cache hits when iterating over our strings again because the last one always wins and steals the slot. for (int i = 0; i < numberOfStrings; i++) { - StringBuilder sb = new StringBuilder(cachedStrings[i]); - string cachedStringFromCache =_cache.GetOrCreateEntry(new StringBuilderInternTarget(sb), out bool cacheHit); + InternableString stringCopy = new InternableString(new string(cachedStrings[i].ToCharArray())); + string cachedStringFromCache =_cache.GetOrCreateEntry(ref stringCopy, out bool cacheHit); cacheHit.ShouldBeFalse(); cachedStringFromCache.ShouldNotBeSameAs(cachedStrings[i]); } @@ -113,7 +123,7 @@ private void AddStringsWithSameHashCode(int numberOfStrings) /// https://www.mono-project.com/docs/advanced/garbage-collector/sgen/#precise-stack-marking /// [Fact] - [SkipOnTargetFramework(TargetFrameworkMonikers.Mono, "doesn't play well with conservative GC scanning")] + [Trait("Category", "mono-osx-failing")] public void RetainsStringUntilCollected() { // Add a string to the cache using a non-inlinable method to make sure it's not reachable from a GC root. diff --git a/src/StringTools/AssemblyInfo.cs b/src/StringTools/AssemblyInfo.cs new file mode 100644 index 00000000000..0a8c0ee0a72 --- /dev/null +++ b/src/StringTools/AssemblyInfo.cs @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +[assembly: InternalsVisibleTo("Microsoft.NET.StringTools.UnitTests, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")] +[assembly: InternalsVisibleTo("Microsoft.NET.StringTools.net35.UnitTests, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] +[assembly: InternalsVisibleTo("Microsoft.NET.StringTools.Benchmark, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] + +[assembly: ComVisible(false)] +[assembly: CLSCompliant(true)] diff --git a/src/StringTools/InternableString.Simple.cs b/src/StringTools/InternableString.Simple.cs new file mode 100644 index 00000000000..88126da5c6f --- /dev/null +++ b/src/StringTools/InternableString.Simple.cs @@ -0,0 +1,231 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq.Expressions; +using System.Text; + +namespace System +{ + /// + /// A bare minimum and inefficient version of MemoryExtensions as provided in System.Memory on .NET 4.5. + /// + public static class MemoryExtensions + { + public static string AsSpan(this T[] array, int start, int length) + { + if (array is char[] charArray) + { + return new string(charArray, start, length); + } + throw new ArgumentException(nameof(array)); + } + } +} + +namespace Microsoft.NET.StringTools +{ + /// + /// Represents a string that can be converted to System.String with interning, i.e. by returning an existing string if it has been seen before + /// and is still tracked in the intern table. + /// + /// + /// This is a simple and inefficient implementation compatible with .NET Framework 3.5. + /// + internal ref struct InternableString + { + /// + /// Enumerator for the top-level struct. Enumerates characters of the string. + /// + public ref struct Enumerator + { + /// + /// The InternableString being enumerated. + /// + private InternableString _string; + + /// + /// Index of the current character, -1 if MoveNext has not been called yet. + /// + private int _charIndex; + + public Enumerator(ref InternableString spanBuilder) + { + _string = spanBuilder; + _charIndex = -1; + } + + /// + /// Returns the current character. + /// + public char Current => (_string._builder == null ? _string.FirstString[_charIndex] : _string._builder[_charIndex]); + + /// + /// Moves to the next character. + /// + /// True if there is another character, false if the enumerator reached the end. + public bool MoveNext() + { + int newIndex = _charIndex + 1; + if (newIndex < _string.Length) + { + _charIndex = newIndex; + return true; + } + return false; + } + } + + /// + /// If this instance wraps a StringBuilder, it uses this backing field. + /// + private StringBuilder? _builder; + + /// + /// If this instance represents one contiguous string, it may be held in this field. + /// + private string? _firstString; + + /// + /// A convenience getter to ensure that we always operate on a non-null string. + /// + private string FirstString => _firstString ?? string.Empty; + + /// + /// Constructs a new InternableString wrapping the given string. + /// + /// The string to wrap, must be non-null. + internal InternableString(string str) + { + if (str == null) + { + throw new ArgumentNullException(nameof(str)); + } + _builder = null; + _firstString = str; + } + + /// + /// Constructs a new InternableString wrapping the given SpanBasedStringBuilder. + /// + internal InternableString(SpanBasedStringBuilder builder) + { + _builder = builder.Builder; + _firstString = null; + } + + /// + /// Gets the length of the string. + /// + public int Length => (_builder == null ? FirstString.Length : _builder.Length); + + /// + /// Creates a new enumerator for enumerating characters in this string. Does not allocate. + /// + /// The enumerator. + public Enumerator GetEnumerator() + { + return new Enumerator(ref this); + } + + /// + /// Returns true if the string is equal to another string by ordinal comparison. + /// + /// Another string. + /// True if this string is equal to . + public bool Equals(string other) + { + if (other.Length != Length) + { + return false; + } + + if (_firstString != null) + { + return _firstString.Equals(other); + } + if (_builder != null) + { + for (int i = 0; i < other.Length; i++) + { + // Note: This indexing into the StringBuilder could be O(N). We prefer it over allocating + // a new string with ToString(). + if (other[i] != _builder[i]) + { + return false; + } + } + } + return true; + } + + /// + /// Returns a System.String representing this string. Allocates memory unless this InternableString was created by wrapping a + /// System.String in which case the original string is returned. + /// + /// The string. + public string ExpensiveConvertToString() + { + // Special case: if we hold just one string, we can directly return it. + if (_firstString != null) + { + return _firstString; + } + return _builder?.ToString() ?? string.Empty; + } + + /// + /// Returns true if this InternableString wraps a System.String and the same System.String is passed as the argument. + /// + /// The string to compare to. + /// True is this instance wraps the given string. + public bool ReferenceEquals(string str) + { + return Object.ReferenceEquals(str, _firstString); + } + + /// + /// Converts this instance to a System.String while first searching for a match in the intern table. + /// + /// + /// May allocate depending on whether the string has already been interned. + /// + public override unsafe string ToString() + { + return WeakStringCacheInterner.Instance.InternableToString(ref this); + } + + /// + /// Implements the simple yet very decently performing djb2 hash function (xor version). + /// + /// A stable hashcode of the string represented by this instance. + public override int GetHashCode() + { + int hashCode = 5381; + + if (_firstString != null) + { + foreach (char ch in _firstString) + { + unchecked + { + hashCode = hashCode * 33 ^ ch; + } + } + } + else if (_builder != null) + { + for (int i = 0; i < _builder.Length; i++) + { + unchecked + { + hashCode = hashCode * 33 ^ _builder[i]; + } + } + } + return hashCode; + } + } +} diff --git a/src/StringTools/InternableString.cs b/src/StringTools/InternableString.cs new file mode 100644 index 00000000000..ca8fa75ef48 --- /dev/null +++ b/src/StringTools/InternableString.cs @@ -0,0 +1,331 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Microsoft.NET.StringTools +{ + /// + /// Represents a string that can be converted to System.String with interning, i.e. by returning an existing string if it has been seen before + /// and is still tracked in the intern table. + /// + internal ref struct InternableString + { + /// + /// Enumerator for the top-level struct. Enumerates characters of the string. + /// + public ref struct Enumerator + { + /// + /// The InternableString being enumerated. + /// + private InternableString _string; + + /// + /// Index of the current span, -1 represents the inline span. + /// + private int _spanIndex; + + /// + /// Index of the current character in the current span, -1 if MoveNext has not been called yet. + /// + private int _charIndex; + + internal Enumerator(ref InternableString str) + { + _string = str; + _spanIndex = -1; + _charIndex = -1; + } + + /// + /// Returns the current character. + /// + public ref readonly char Current + { + get + { + if (_spanIndex == -1) + { + return ref _string._inlineSpan[_charIndex]; + } + ReadOnlyMemory span = _string._spans![_spanIndex]; + return ref span.Span[_charIndex]; + } + } + + /// + /// Moves to the next character. + /// + /// True if there is another character, false if the enumerator reached the end. + public bool MoveNext() + { + int newCharIndex = _charIndex + 1; + if (_spanIndex == -1) + { + if (newCharIndex < _string._inlineSpan.Length) + { + _charIndex = newCharIndex; + return true; + } + _spanIndex = 0; + newCharIndex = 0; + } + + if (_string._spans != null) + { + while (_spanIndex < _string._spans.Count) + { + if (newCharIndex < _string._spans[_spanIndex].Length) + { + _charIndex = newCharIndex; + return true; + } + _spanIndex++; + newCharIndex = 0; + } + } + return false; + } + } + + /// + /// The span held by this struct, inline to be able to represent . May be empty. + /// + private readonly ReadOnlySpan _inlineSpan; + +#if NETSTANDARD + /// + /// .NET Core does not keep a reference to the containing object in . In particular, + /// it cannot recover the string if the span represents one. We have to hold the reference separately to be able to + /// roundtrip String->InternableString->String without allocating a new String. + /// + private string? _inlineSpanString; +#endif + + /// + /// Additional spans held by this struct. May be null. + /// + private List>? _spans; + + /// + /// Constructs a new InternableString wrapping the given . + /// + /// The span to wrap. + /// + /// When wrapping a span representing an entire System.String, use Internable(string) for optimum performance. + /// + internal InternableString(ReadOnlySpan span) + { + _inlineSpan = span; + _spans = null; + Length = span.Length; +#if NETSTANDARD + _inlineSpanString = null; +#endif + } + + /// + /// Constructs a new InternableString wrapping the given string. + /// + /// The string to wrap, must be non-null. + internal InternableString(string str) + { + if (str == null) + { + throw new ArgumentNullException(nameof(str)); + } + + _inlineSpan = str.AsSpan(); + _spans = null; + Length = str.Length; +#if NETSTANDARD + _inlineSpanString = str; +#endif + } + + /// + /// Constructs a new InternableString wrapping the given SpanBasedStringBuilder. + /// + internal InternableString(SpanBasedStringBuilder stringBuilder) + { + _inlineSpan = default(ReadOnlySpan); + _spans = stringBuilder.Spans; + Length = stringBuilder.Length; +#if NETSTANDARD + _inlineSpanString = null; +#endif + } + + /// + /// Gets the length of the string. + /// + public int Length { get; private set; } + + /// + /// Creates a new enumerator for enumerating characters in this string. Does not allocate. + /// + /// The enumerator. + public Enumerator GetEnumerator() + { + return new Enumerator(ref this); + } + + /// + /// Returns true if the string is equal to another string by ordinal comparison. + /// + /// Another string. + /// True if this string is equal to . + public bool Equals(string other) + { + if (other.Length != Length) + { + return false; + } + + if (_inlineSpan.SequenceCompareTo(other.AsSpan(0, _inlineSpan.Length)) != 0) + { + return false; + } + + if (_spans != null) + { + int otherStart = _inlineSpan.Length; + foreach (ReadOnlyMemory span in _spans) + { + if (span.Span.SequenceCompareTo(other.AsSpan(otherStart, span.Length)) != 0) + { + return false; + } + otherStart += span.Length; + } + } + return true; + } + + /// + /// Returns a System.String representing this string. Allocates memory unless this InternableString was created by wrapping a + /// System.String in which case the original string is returned. + /// + /// The string. + public unsafe string ExpensiveConvertToString() + { + if (Length == 0) + { + return string.Empty; + } + + // Special case: if we hold just one string, we can directly return it. + if (_inlineSpan.Length == Length) + { +#if NETSTANDARD + if (_inlineSpanString != null) + { + return _inlineSpanString; + } +#else + return _inlineSpan.ToString(); +#endif + } + if (_inlineSpan.IsEmpty && _spans?[0].Length == Length) + { + return _spans[0].ToString(); + } + + // In all other cases we create a new string instance and concatenate all spans into it. Note that while technically mutating + // the System.String, the technique is generally considered safe as we are the sole owners of the new object. It is important + // to initialize the string with the '\0' characters as this hits an optimized code path in the runtime. + string result = new string((char)0, Length); + + fixed (char* resultPtr = result) + { + char* destPtr = resultPtr; + if (!_inlineSpan.IsEmpty) + { + fixed (char* sourcePtr = _inlineSpan) + { + Unsafe.CopyBlockUnaligned(destPtr, sourcePtr, 2 * (uint)_inlineSpan.Length); + } + destPtr += _inlineSpan.Length; + } + + if (_spans != null) + { + foreach (ReadOnlyMemory span in _spans) + { + if (!span.IsEmpty) + { + fixed (char* sourcePtr = span.Span) + { + Unsafe.CopyBlockUnaligned(destPtr, sourcePtr, 2 * (uint)span.Length); + } + destPtr += span.Length; + } + } + } + } + return result; + } + + /// + /// Returns true if this InternableString wraps a System.String and the same System.String is passed as the argument. + /// + /// The string to compare to. + /// True is this instance wraps the given string. + public bool ReferenceEquals(string str) + { + if (_inlineSpan.Length == Length) + { + return _inlineSpan == str.AsSpan(); + } + if (_inlineSpan.IsEmpty && _spans?.Count == 1 && _spans[0].Length == Length) + { + return _spans[0].Span == str.AsSpan(); + } + return false; + } + + /// + /// Converts this instance to a System.String while first searching for a match in the intern table. + /// + /// + /// May allocate depending on whether the string has already been interned. + /// + public override string ToString() + { + return WeakStringCacheInterner.Instance.InternableToString(ref this); + } + + /// + /// Implements the simple yet very decently performing djb2 hash function (xor version). + /// + /// A stable hashcode of the string represented by this instance. + public override unsafe int GetHashCode() + { + int hashCode = 5381; + fixed (char* charPtr = _inlineSpan) + { + for (int i = 0; i < _inlineSpan.Length; i++) + { + hashCode = unchecked(hashCode * 33 ^ charPtr[i]); + } + } + if (_spans != null) + { + foreach (ReadOnlyMemory span in _spans) + { + fixed (char* charPtr = span.Span) + { + for (int i = 0; i < span.Length; i++) + { + hashCode = unchecked(hashCode * 33 ^ charPtr[i]); + } + } + } + } + return hashCode; + } + } +} diff --git a/src/StringTools/SpanBasedStringBuilder.Simple.cs b/src/StringTools/SpanBasedStringBuilder.Simple.cs new file mode 100644 index 00000000000..86e38c2907f --- /dev/null +++ b/src/StringTools/SpanBasedStringBuilder.Simple.cs @@ -0,0 +1,153 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Text; + +namespace Microsoft.NET.StringTools +{ + /// + /// A simple version of SpanBasedStringBuilder to be used on .NET Framework 3.5. Wraps a . + /// + public class SpanBasedStringBuilder : IDisposable + { + /// + /// Enumerator for the top-level struct. Enumerates characters of the string. + /// + public struct Enumerator + { + /// + /// The StringBuilder being enumerated. + /// + private StringBuilder _builder; + + /// + /// Index of the current character, -1 if MoveNext has not been called yet. + /// + private int _charIndex; + + public Enumerator(StringBuilder builder) + { + _builder = builder; + _charIndex = -1; + } + + /// + /// Returns the current character. + /// + public char Current => _builder[_charIndex]; + + /// + /// Moves to the next character. + /// + /// True if there is another character, false if the enumerator reached the end. + public bool MoveNext() + { + int newIndex = _charIndex + 1; + if (newIndex < _builder.Length) + { + _charIndex = newIndex; + return true; + } + return false; + } + } + + /// + /// The backing StringBuilder. + /// + private StringBuilder _builder; + + internal StringBuilder Builder => _builder; + + /// + /// Constructs a new SpanBasedStringBuilder containing the given string. + /// + /// The string to wrap, must be non-null. + public SpanBasedStringBuilder(string str) + : this() + { + if (str == null) + { + throw new ArgumentNullException(nameof(str)); + } + Append(str); + } + + /// + /// Constructs a new empty SpanBasedStringBuilder with the given expected number of spans. + /// + public SpanBasedStringBuilder(int capacity = 4) + { + // Since we're using StringBuilder as the backing store in this implementation, our capacity is expressed + // in number of characters rather than number of spans. We use 128 as a reasonable expected multiplier to + // go from one to the other, i.e. by default we'll preallocate a 512-character StringBuilder. + _builder = new StringBuilder(capacity * 128); + } + + /// + /// Gets the length of the string. + /// + public int Length => _builder.Length; + + /// + /// Creates a new enumerator for enumerating characters in this string. Does not allocate. + /// + /// The enumerator. + public Enumerator GetEnumerator() + { + return new Enumerator(_builder); + } + + /// + /// Converts this instance to a System.String while first searching for a match in the intern table. + /// + /// + /// May allocate depending on whether the string has already been interned. + /// + public override string ToString() + { + return new InternableString(this).ToString(); + } + + /// + /// Releases this instance. + /// + public void Dispose() + { + Strings.ReturnSpanBasedStringBuilder(this); + } + + #region Public mutating methods + + /// + /// Appends a string. + /// + /// The string to append. + internal void Append(string value) + { + _builder.Append(value); + } + + /// + /// Appends a substring. + /// + /// The string to append. + /// The start index of the substring within to append. + /// The length of the substring to append. + internal void Append(string value, int startIndex, int count) + { + _builder.Append(value, startIndex, count); + } + + /// + /// Clears this instance making it represent an empty string. + /// + public void Clear() + { + _builder.Length = 0; + } + + #endregion + } +} diff --git a/src/StringTools/SpanBasedStringBuilder.cs b/src/StringTools/SpanBasedStringBuilder.cs new file mode 100644 index 00000000000..2d388641f85 --- /dev/null +++ b/src/StringTools/SpanBasedStringBuilder.cs @@ -0,0 +1,261 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; + +namespace Microsoft.NET.StringTools +{ + /// + /// A StringBuilder replacement that keeps a list of spans making up the intermediate string rather + /// than a copy of its characters. This has positive impact on both memory (no need to allocate space for the intermediate string) + /// and time (no need to copy characters to the intermediate string). + /// + /// + /// The method tries to intern the resulting string without even allocating it if it's already interned. + /// Use to take advantage of pooling to eliminate allocation overhead of this class. + /// + public class SpanBasedStringBuilder : IDisposable + { + /// + /// Enumerator for the top-level class. Enumerates characters of the string. + /// + public struct Enumerator + { + /// + /// The spans being enumerated. + /// + private readonly List> _spans; + + /// + /// Index of the current span. + /// + private int _spanIndex; + + /// + /// Index of the current character in the current span, -1 if MoveNext has not been called yet. + /// + private int _charIndex; + + internal Enumerator(List> spans) + { + _spans = spans; + _spanIndex = 0; + _charIndex = -1; + } + + /// + /// Returns the current character. + /// + public readonly char Current + { + get + { + ReadOnlyMemory span = _spans[_spanIndex]; + return span.Span[_charIndex]; + } + } + + /// + /// Moves to the next character. + /// + /// True if there is another character, false if the enumerator reached the end. + public bool MoveNext() + { + int newCharIndex = _charIndex + 1; + while (_spanIndex < _spans.Count) + { + if (newCharIndex < _spans[_spanIndex].Length) + { + _charIndex = newCharIndex; + return true; + } + _spanIndex++; + newCharIndex = 0; + } + return false; + } + } + + /// + /// Spans making up the rope. + /// + private readonly List> _spans; + + /// + /// Internal getter to get the list of spans out of the SpanBasedStringBuilder. + /// + internal List> Spans => _spans; + + /// + /// Constructs a new SpanBasedStringBuilder containing the given string. + /// + /// The string to wrap, must be non-null. + public SpanBasedStringBuilder(string str) + : this() + { + if (str == null) + { + throw new ArgumentNullException(nameof(str)); + } + Append(str); + } + + /// + /// Constructs a new empty SpanBasedStringBuilder with the given expected number of spans. + /// + public SpanBasedStringBuilder(int capacity = 4) + { + _spans = new List>(capacity); + Length = 0; + } + + /// + /// Gets the length of the string. + /// + public int Length { get; private set; } + + /// + /// Gets the capacity of the SpanBasedStringBuilder in terms of number of spans it can hold without allocating. + /// + public int Capacity => _spans.Capacity; + + /// + /// Creates a new enumerator for enumerating characters in this string. Does not allocate. + /// + /// The enumerator. + public Enumerator GetEnumerator() + { + return new Enumerator(_spans); + } + + /// + /// Converts this instance to a System.String while first searching for a match in the intern table. + /// + /// + /// May allocate depending on whether the string has already been interned. + /// + public override string ToString() + { + return new InternableString(this).ToString(); + } + + /// + /// Releases this instance. + /// + public void Dispose() + { + Strings.ReturnSpanBasedStringBuilder(this); + } + + #region Public mutating methods + + /// + /// Appends a string. + /// + /// The string to append. + public void Append(string value) + { + if (!string.IsNullOrEmpty(value)) + { + _spans.Add(value.AsMemory()); + Length += value.Length; + } + } + + /// + /// Appends a substring. + /// + /// The string to append. + /// The start index of the substring within to append. + /// The length of the substring to append. + public void Append(string value, int startIndex, int count) + { + if (value != null) + { + if (count > 0) + { + _spans.Add(value.AsMemory(startIndex, count)); + Length += count; + } + } + else + { + if (startIndex != 0 || count != 0) + { + throw new ArgumentNullException(nameof(value)); + } + } + } + + /// + /// Removes leading white-space characters from the string. + /// + public void TrimStart() + { + for (int spanIdx = 0; spanIdx < _spans.Count; spanIdx++) + { + ReadOnlySpan span = _spans[spanIdx].Span; + int i = 0; + while (i < span.Length && char.IsWhiteSpace(span[i])) + { + i++; + } + if (i > 0) + { + _spans[spanIdx] = _spans[spanIdx].Slice(i); + Length -= i; + } + if (!_spans[spanIdx].IsEmpty) + { + return; + } + } + } + + /// + /// Removes trailing white-space characters from the string. + /// + public void TrimEnd() + { + for (int spanIdx = _spans.Count - 1; spanIdx >= 0; spanIdx--) + { + ReadOnlySpan span = _spans[spanIdx].Span; + int i = span.Length - 1; + while (i >= 0 && char.IsWhiteSpace(span[i])) + { + i--; + } + if (i + 1 < span.Length) + { + _spans[spanIdx] = _spans[spanIdx].Slice(0, i + 1); + Length -= span.Length - (i + 1); + } + if (!_spans[spanIdx].IsEmpty) + { + return; + } + } + } + + /// + /// Removes leading and trailing white-space characters from the string. + /// + public void Trim() + { + TrimStart(); + TrimEnd(); + } + + /// + /// Clears this instance making it represent an empty string. + /// + public void Clear() + { + _spans.Clear(); + Length = 0; + } + + #endregion + } +} diff --git a/src/StringTools/StringTools.cs b/src/StringTools/StringTools.cs new file mode 100644 index 00000000000..fbe794342b9 --- /dev/null +++ b/src/StringTools/StringTools.cs @@ -0,0 +1,108 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; + +namespace Microsoft.NET.StringTools +{ + public static class Strings + + { + #region Fields + + /// + /// Per-thread instance of the SpanBasedStringBuilder, created lazily. + /// + /// + /// This field serves as a per-thread one-item object pool, which is adequate for most use + /// cases as the builder is not expected to be held for extended periods of time. + /// + [ThreadStatic] + private static SpanBasedStringBuilder? _spanBasedStringBuilder; + + #endregion + + #region Public methods + + /// + /// Interns the given string, keeping only a weak reference to the returned value. + /// + /// The string to intern. + /// A string equal to , could be the same object as . + /// + /// The intern pool does not retain strong references to the strings it's holding so strings are automatically evicted + /// after they become unrooted. This is in contrast to System.String.Intern which holds strings forever. + /// + public static string WeakIntern(string str) + { + InternableString internableString = new InternableString(str); + return WeakStringCacheInterner.Instance.InternableToString(ref internableString); + } + +#if !NET35 + /// + /// Interns the given readonly span of characters, keeping only a weak reference to the returned value. + /// + /// The character span to intern. + /// A string equal to , could be the result of calling ToString() on . + /// + /// The intern pool does not retain strong references to the strings it's holding so strings are automatically evicted + /// after they become unrooted. This is in contrast to System.String.Intern which holds strings forever. + /// + public static string WeakIntern(ReadOnlySpan str) + { + InternableString internableString = new InternableString(str); + return WeakStringCacheInterner.Instance.InternableToString(ref internableString); + } +#endif + + /// + /// Returns a new or recycled . + /// + /// The SpanBasedStringBuilder. + /// + /// Call on the returned instance to recycle it. + /// + public static SpanBasedStringBuilder GetSpanBasedStringBuilder() + { + SpanBasedStringBuilder? stringBuilder = _spanBasedStringBuilder; + if (stringBuilder == null) + { + return new SpanBasedStringBuilder(); + } + else + { + _spanBasedStringBuilder = null; + return stringBuilder; + } + } + + /// + /// Enables diagnostics in the interner. Call to retrieve the diagnostic data. + /// + public static void EnableDiagnostics() + { + WeakStringCacheInterner.Instance.EnableStatistics(); + } + + /// + /// Retrieves the diagnostic data describing the current state of the interner. Make sure to call beforehand. + /// + public static string CreateDiagnosticReport() + { + return WeakStringCacheInterner.Instance.FormatStatistics(); + } + + #endregion + + /// + /// Returns a instance back to the pool if possible. + /// + /// The instance to return. + internal static void ReturnSpanBasedStringBuilder(SpanBasedStringBuilder stringBuilder) + { + stringBuilder.Clear(); + _spanBasedStringBuilder = stringBuilder; + } + } +} diff --git a/src/StringTools/StringTools.csproj b/src/StringTools/StringTools.csproj new file mode 100644 index 00000000000..f3ac3633595 --- /dev/null +++ b/src/StringTools/StringTools.csproj @@ -0,0 +1,39 @@ + + + $(LibraryTargetFrameworks) + $(LibraryTargetFrameworks);net35 + AnyCPU + true + true + 8.0 + Microsoft.NET.StringTools + true + enable + + 1.0.0 + + true + + Microsoft.NET.StringTools + Microsoft.NET.StringTools.net35 + + + + + + + + + + + + + + + + + + + + + diff --git a/src/StringTools/StringTools.pkgdef b/src/StringTools/StringTools.pkgdef new file mode 100644 index 00000000000..4ca09cf37c9 --- /dev/null +++ b/src/StringTools/StringTools.pkgdef @@ -0,0 +1,7 @@ +[$RootKey$\RuntimeConfiguration\dependentAssembly\bindingRedirection\{7FBCE0AF-48AC-46AC-8841-F00D17C63A22}] +"name"="StringTools" +"codeBase"="$BaseInstallDir$\MSBuild\Current\Bin\Microsoft.NET.StringTools.dll" +"publicKeyToken"="b03f5f7f11d50a3a" +"culture"="neutral" +"oldVersion"="0.0.0.0-1.0.0.0" +"newVersion"="1.0.0.0" diff --git a/src/StringTools/WeakStringCache.Concurrent.cs b/src/StringTools/WeakStringCache.Concurrent.cs new file mode 100644 index 00000000000..6110475e946 --- /dev/null +++ b/src/StringTools/WeakStringCache.Concurrent.cs @@ -0,0 +1,124 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Threading; + +namespace Microsoft.NET.StringTools +{ + /// + /// Implements the WeakStringCache functionality on modern .NET versions where ConcurrentDictionary is available. + /// + internal sealed partial class WeakStringCache : IDisposable + { + private readonly ConcurrentDictionary _stringsByHashCode; + + public WeakStringCache() + { + _stringsByHashCode = new ConcurrentDictionary(Environment.ProcessorCount, _initialCapacity); + } + + /// + /// Main entrypoint of this cache. Tries to look up a string that matches the given internable. If it succeeds, returns + /// the string and sets cacheHit to true. If the string is not found, calls ExpensiveConvertToString on the internable, + /// adds the resulting string to the cache, and returns it, setting cacheHit to false. + /// + /// The internable describing the string we're looking for. + /// true if match found in cache, false otherwise. + /// A string matching the given internable. + public string GetOrCreateEntry(ref InternableString internable, out bool cacheHit) + { + int hashCode = internable.GetHashCode(); + + StringWeakHandle handle; + string? result; + + // Get the existing handle from the cache and lock it while we're dereferencing it to prevent a race with the Scavenge + // method running on another thread and freeing the handle from underneath us. + if (_stringsByHashCode.TryGetValue(hashCode, out handle)) + { + lock (handle) + { + result = handle.GetString(ref internable); + if (result != null) + { + cacheHit = true; + return result; + } + + // We have the handle but it's not referencing the right string - create the right string and store it in the handle. + result = internable.ExpensiveConvertToString(); + handle.SetString(result); + + cacheHit = false; + return result; + } + } + + // We don't have the handle in the cache - create the right string, store it in the handle, and add the handle to the cache. + result = internable.ExpensiveConvertToString(); + + handle = new StringWeakHandle(); + handle.SetString(result); + _stringsByHashCode.TryAdd(hashCode, handle); + + // Remove unused handles if our heuristic indicates that it would be productive. + int scavengeThreshold = _scavengeThreshold; + if (_stringsByHashCode.Count >= scavengeThreshold) + { + // Before we start scavenging set _scavengeThreshold to a high value to effectively lock other threads from + // running Scavenge at the same time. + if (Interlocked.CompareExchange(ref _scavengeThreshold, int.MaxValue, scavengeThreshold) == scavengeThreshold) + { + try + { + // Get rid of unused handles. + Scavenge(); + } + finally + { + // And do this again when the number of handles reaches double the current after-scavenge number. + _scavengeThreshold = _stringsByHashCode.Count * 2; + } + } + } + + cacheHit = false; + return result; + } + + /// + /// Iterates over the cache and removes unused GC handles, i.e. handles that don't reference live strings. + /// This is expensive so try to call such that the cost is amortized to O(1) per GetOrCreateEntry() invocation. + /// + public void Scavenge() + { + foreach (KeyValuePair entry in _stringsByHashCode) + { + // We can safely dereference entry.Value as the caller guarantees that Scavenge runs only on one thread. + if (!entry.Value.IsUsed && _stringsByHashCode.TryRemove(entry.Key, out StringWeakHandle removedHandle)) + { + lock (removedHandle) + { + // Note that the removed handle may be different from the one we got from the enumerator so check again + // and try to put it back if it's still in use. + if (!removedHandle.IsUsed || !_stringsByHashCode.TryAdd(entry.Key, removedHandle)) + { + removedHandle.Free(); + } + } + } + } + } + + /// + /// Returns internal debug counters calculated based on the current state of the cache. + /// + public DebugInfo GetDebugInfo() + { + return GetDebugInfoImpl(); + } + } +} diff --git a/src/MSBuildTaskHost/WeakStringCache.Locking.cs b/src/StringTools/WeakStringCache.Locking.cs similarity index 93% rename from src/MSBuildTaskHost/WeakStringCache.Locking.cs rename to src/StringTools/WeakStringCache.Locking.cs index d26dd984963..47daf7ee824 100644 --- a/src/MSBuildTaskHost/WeakStringCache.Locking.cs +++ b/src/StringTools/WeakStringCache.Locking.cs @@ -4,7 +4,7 @@ using System; using System.Collections.Generic; -namespace Microsoft.Build +namespace Microsoft.NET.StringTools { /// /// Implements the WeakStringCache functionality on .NET Framework 3.5 where ConcurrentDictionary is not available. @@ -25,19 +25,19 @@ public WeakStringCache() /// /// The internable describing the string we're looking for. /// A string matching the given internable. - public string GetOrCreateEntry(T internable, out bool cacheHit) where T : IInternable + public string GetOrCreateEntry(ref InternableString internable, out bool cacheHit) { - int hashCode = GetInternableHashCode(internable); + int hashCode = internable.GetHashCode(); StringWeakHandle handle; - string result; + string? result; bool addingNewHandle = false; lock (_stringsByHashCode) { if (_stringsByHashCode.TryGetValue(hashCode, out handle)) { - result = handle.GetString(internable); + result = handle.GetString(ref internable); if (result != null) { cacheHit = true; @@ -81,7 +81,7 @@ public WeakStringCache() /// private void ScavengeNoLock() { - List keysToRemove = null; + List? keysToRemove = null; foreach (KeyValuePair entry in _stringsByHashCode) { if (!entry.Value.IsUsed) diff --git a/src/Shared/WeakStringCache.cs b/src/StringTools/WeakStringCache.cs similarity index 83% rename from src/Shared/WeakStringCache.cs rename to src/StringTools/WeakStringCache.cs index 22021e1a08a..cedde724390 100644 --- a/src/Shared/WeakStringCache.cs +++ b/src/StringTools/WeakStringCache.cs @@ -5,7 +5,7 @@ using System.Collections.Generic; using System.Runtime.InteropServices; -namespace Microsoft.Build +namespace Microsoft.NET.StringTools { /// /// A cache of weak GC handles pointing to strings. Weak GC handles are functionally equivalent to WeakReference's but have less overhead @@ -28,7 +28,7 @@ public struct DebugInfo /// /// Holds a weak GC handle to a string. Shared by all strings with the same hash code and referencing the last such string we've seen. /// - private struct StringWeakHandle + private class StringWeakHandle { /// /// Weak GC handle to the last string of the given hashcode we've seen. @@ -45,12 +45,11 @@ private struct StringWeakHandle /// /// The internable describing the string we're looking for. /// The string matching the internable or null if the handle is referencing a collected string or the string is different. - public string GetString(T internable) where T : IInternable + public string? GetString(ref InternableString internable) { if (WeakHandle.IsAllocated && WeakHandle.Target is string str) { - if (internable.Length == str.Length && - internable.StartsWithStringByOrdinalComparison(str)) + if (internable.Equals(str)) { return str; } @@ -94,28 +93,10 @@ public void Free() /// private int _scavengeThreshold = _initialCapacity; - /// - /// Implements the simple yet very decently performing djb2 hash function (xor version). - /// - /// The internable to compute the hash code for. - /// The 32-bit hash code. - internal static int GetInternableHashCode(T internable) where T : IInternable - { - int hashCode = 5381; - for (int i = 0; i < internable.Length; i++) - { - unchecked - { - hashCode = hashCode * 33 ^ internable[i]; - } - } - return hashCode; - } - /// /// Frees all GC handles and clears the cache. /// - public void Dispose() + private void DisposeImpl() { foreach (KeyValuePair entry in _stringsByHashCode) { @@ -124,6 +105,17 @@ public void Dispose() _stringsByHashCode.Clear(); } + public void Dispose() + { + DisposeImpl(); + GC.SuppressFinalize(this); + } + + ~WeakStringCache() + { + DisposeImpl(); + } + /// /// Returns internal debug counters calculated based on the current state of the cache. /// diff --git a/src/StringTools/WeakStringCacheInterner.cs b/src/StringTools/WeakStringCacheInterner.cs new file mode 100644 index 00000000000..34366af3cfc --- /dev/null +++ b/src/StringTools/WeakStringCacheInterner.cs @@ -0,0 +1,180 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; + +namespace Microsoft.NET.StringTools +{ + /// + /// Implements interning based on a WeakStringCache. + /// + internal class WeakStringCacheInterner : IDisposable + { + /// + /// Enumerates the possible interning results. + /// + private enum InternResult + { + FoundInWeakStringCache, + AddedToWeakStringCache, + } + + internal static WeakStringCacheInterner Instance = new WeakStringCacheInterner(); + + /// + /// The cache to keep strings in. + /// + private readonly WeakStringCache _weakStringCache = new WeakStringCache(); + +#region Statistics + /// + /// Number of times the regular interning path found the string in the cache. + /// + private int _regularInternHits; + + /// + /// Number of times the regular interning path added the string to the cache. + /// + private int _regularInternMisses; + + /// + /// Total number of strings eliminated by interning. + /// + private int _internEliminatedStrings; + + /// + /// Total number of chars eliminated across all strings. + /// + private int _internEliminatedChars; + + /// + /// Maps strings that went though the interning path to the number of times they have been + /// seen. The higher the number the better the payoff of interning. Null if statistics + /// gathering has not been enabled. + /// + private Dictionary? _internCallCountsByString; + +#endregion + + /// + /// Try to intern the string. + /// The return value indicates the how the string was interned. + /// + private InternResult Intern(ref InternableString candidate, out string interned) + { + interned = _weakStringCache.GetOrCreateEntry(ref candidate, out bool cacheHit); + return cacheHit ? InternResult.FoundInWeakStringCache : InternResult.AddedToWeakStringCache; + } + + /// + /// WeakIntern the given InternableString. + /// + public string InternableToString(ref InternableString candidate) + { + if (candidate.Length == 0) + { + return string.Empty; + } + + InternResult resultForStatistics = Intern(ref candidate, out string internedString); +#if DEBUG + string expectedString = candidate.ExpensiveConvertToString(); + if (!String.Equals(internedString, expectedString)) + { + throw new InvalidOperationException(String.Format("Interned string {0} should have been {1}", internedString, expectedString)); + } +#endif + + if (_internCallCountsByString != null) + { + lock (_internCallCountsByString) + { + switch (resultForStatistics) + { + case InternResult.FoundInWeakStringCache: + _regularInternHits++; + break; + case InternResult.AddedToWeakStringCache: + _regularInternMisses++; + break; + } + + _internCallCountsByString.TryGetValue(internedString, out int priorCount); + _internCallCountsByString[internedString] = priorCount + 1; + + if (!candidate.ReferenceEquals(internedString)) + { + // Reference changed so 'candidate' is now released and should save memory. + _internEliminatedStrings++; + _internEliminatedChars += candidate.Length; + } + } + } + + return internedString; + } + + /// + /// + /// + public void EnableStatistics() + { + _internCallCountsByString = new Dictionary(); + } + + /// + /// Returns a string with human-readable statistics. + /// + public string FormatStatistics() + { + StringBuilder result = new StringBuilder(1024); + + string title = "Opportunistic Intern"; + + if (_internCallCountsByString != null) + { + result.AppendLine(string.Format("\n{0}{1}{0}", new string('=', 41 - (title.Length / 2)), title)); + result.AppendLine(string.Format("||{0,50}|{1,20:N0}|{2,8}|", "WeakStringCache Hits", _regularInternHits, "hits")); + result.AppendLine(string.Format("||{0,50}|{1,20:N0}|{2,8}|", "WeakStringCache Misses", _regularInternMisses, "misses")); + result.AppendLine(string.Format("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Strings*", _internEliminatedStrings, "strings")); + result.AppendLine(string.Format("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Chars", _internEliminatedChars, "chars")); + result.AppendLine(string.Format("||{0,50}|{1,20:N0}|{2,8}|", "Estimated Eliminated Bytes", _internEliminatedChars * 2, "bytes")); + result.AppendLine("Elimination assumes that strings provided were unique objects."); + result.AppendLine("|---------------------------------------------------------------------------------|"); + + IEnumerable topInternedStrings = + _internCallCountsByString + .OrderByDescending(kv => kv.Value * kv.Key.Length) + .Where(kv => kv.Value > 1) + .Take(15) + .Select(kv => string.Format(CultureInfo.InvariantCulture, "({1} instances x each {2} chars)\n{0}", kv.Key, kv.Value, kv.Key.Length)); + + result.AppendLine(string.Format("##########Top Top Interned Strings: \n{0} ", string.Join("\n==============\n", topInternedStrings.ToArray()))); + result.AppendLine(); + + WeakStringCache.DebugInfo debugInfo = _weakStringCache.GetDebugInfo(); + result.AppendLine("WeakStringCache statistics:"); + result.AppendLine(string.Format("String count live/collected/total = {0}/{1}/{2}", debugInfo.LiveStringCount, debugInfo.CollectedStringCount, debugInfo.LiveStringCount + debugInfo.CollectedStringCount)); + } + else + { + result.Append(title); + result.AppendLine(" - EnableStatisticsGathering() has not been called"); + } + + return result.ToString(); + } + + /// + /// Releases all strings from the underlying intern table. + /// + public void Dispose() + { + _weakStringCache.Dispose(); + } + } +} diff --git a/src/Tasks/ListOperators/FindUnderPath.cs b/src/Tasks/ListOperators/FindUnderPath.cs index 876f06a6c9a..46acb04aad8 100644 --- a/src/Tasks/ListOperators/FindUnderPath.cs +++ b/src/Tasks/ListOperators/FindUnderPath.cs @@ -6,6 +6,8 @@ using Microsoft.Build.Framework; using Microsoft.Build.Shared; +using Microsoft.NET.StringTools; + namespace Microsoft.Build.Tasks { /// @@ -54,7 +56,7 @@ public override bool Execute() try { conePath = - OpportunisticIntern.InternStringIfPossible( + Strings.WeakIntern( System.IO.Path.GetFullPath(FileUtilities.FixFilePath(Path.ItemSpec))); conePath = FileUtilities.EnsureTrailingSlash(conePath); } @@ -75,7 +77,7 @@ public override bool Execute() try { fullPath = - OpportunisticIntern.InternStringIfPossible( + Strings.WeakIntern( System.IO.Path.GetFullPath(FileUtilities.FixFilePath(item.ItemSpec))); } catch (Exception e) when (ExceptionHandling.IsIoRelatedException(e)) diff --git a/src/Tasks/Microsoft.Build.Tasks.csproj b/src/Tasks/Microsoft.Build.Tasks.csproj index f70cb0e0dc6..19ec24490b0 100644 --- a/src/Tasks/Microsoft.Build.Tasks.csproj +++ b/src/Tasks/Microsoft.Build.Tasks.csproj @@ -82,18 +82,6 @@ NGen.cs - - IInternable.cs - - - WeakStringCache.cs - - - WeakStringCache.Concurrent.cs - - - OpportunisticIntern.cs - PropertyParser.cs True @@ -983,6 +971,7 @@ + diff --git a/src/UnitTests.Shared/Microsoft.Build.UnitTests.Shared.csproj b/src/UnitTests.Shared/Microsoft.Build.UnitTests.Shared.csproj index 30db3703d9b..209e21d4d1e 100644 --- a/src/UnitTests.Shared/Microsoft.Build.UnitTests.Shared.csproj +++ b/src/UnitTests.Shared/Microsoft.Build.UnitTests.Shared.csproj @@ -63,18 +63,6 @@ NativeMethodsShared.cs - - IInternable.cs - - - WeakStringCache.cs - - - WeakStringCache.Concurrent.cs - - - OpportunisticIntern.cs - ResourceUtilities.cs diff --git a/src/Utilities/Microsoft.Build.Utilities.csproj b/src/Utilities/Microsoft.Build.Utilities.csproj index 840d2bcc37e..2fdd06afdd6 100644 --- a/src/Utilities/Microsoft.Build.Utilities.csproj +++ b/src/Utilities/Microsoft.Build.Utilities.csproj @@ -19,7 +19,8 @@ - + + @@ -124,18 +125,6 @@ Shared\InprocTrackingNativeMethods.cs - - Shared\IInternable.cs - - - Shared\WeakStringCache.cs - - - Shared\WeakStringCache.Concurrent.cs - - - Shared\OpportunisticIntern.cs - Shared\ReadOnlyEmptyCollection.cs