From 99f5b6e2fcd1b6dc27dfb3c5e654f64641017e75 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 4 Mar 2021 14:48:00 +0100 Subject: [PATCH 1/6] Remove unused parameter from GetFileListUnescaped --- src/Build/Utilities/EngineFileUtilities.cs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Build/Utilities/EngineFileUtilities.cs b/src/Build/Utilities/EngineFileUtilities.cs index 509be5de918..e6d3a59f7e0 100644 --- a/src/Build/Utilities/EngineFileUtilities.cs +++ b/src/Build/Utilities/EngineFileUtilities.cs @@ -53,17 +53,15 @@ public EngineFileUtilities(FileMatcher fileMatcher) /// /// The directory to evaluate, escaped. /// The filespec to evaluate, escaped. - /// Whether to force file glob expansion when eager expansion is turned off /// Array of file paths, unescaped. internal string[] GetFileListUnescaped ( string directoryEscaped, - string filespecEscaped, - bool forceEvaluate = false + string filespecEscaped ) { - return GetFileList(directoryEscaped, filespecEscaped, false /* returnEscaped */, forceEvaluate); + return GetFileList(directoryEscaped, filespecEscaped, returnEscaped: false, forceEvaluateWildCards: false); } /// @@ -89,7 +87,7 @@ internal string[] GetFileListEscaped bool forceEvaluate = false ) { - return GetFileList(directoryEscaped, filespecEscaped, true /* returnEscaped */, forceEvaluate, excludeSpecsEscaped); + return GetFileList(directoryEscaped, filespecEscaped, returnEscaped: true, forceEvaluate, excludeSpecsEscaped); } internal static bool FilespecHasWildcards(string filespecEscaped) From 4cdd49ea227602f2733c467625f6738f987ef3a5 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 4 Mar 2021 14:49:12 +0100 Subject: [PATCH 2/6] Don't create empty excludeSpecsEscaped lists --- src/Build/Utilities/EngineFileUtilities.cs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Build/Utilities/EngineFileUtilities.cs b/src/Build/Utilities/EngineFileUtilities.cs index e6d3a59f7e0..e6ca9b5919c 100644 --- a/src/Build/Utilities/EngineFileUtilities.cs +++ b/src/Build/Utilities/EngineFileUtilities.cs @@ -141,11 +141,6 @@ private string[] GetFileList { ErrorUtilities.VerifyThrowInternalLength(filespecEscaped, nameof(filespecEscaped)); - if (excludeSpecsEscaped == null) - { - excludeSpecsEscaped = Enumerable.Empty(); - } - string[] fileList; if (!FilespecHasWildcards(filespecEscaped) || @@ -164,7 +159,7 @@ private string[] GetFileList // Unescape before handing it to the filesystem. var directoryUnescaped = EscapingUtilities.UnescapeAll(directoryEscaped); var filespecUnescaped = EscapingUtilities.UnescapeAll(filespecEscaped); - var excludeSpecsUnescaped = excludeSpecsEscaped.Where(IsValidExclude).Select(i => EscapingUtilities.UnescapeAll(i)).ToList(); + var excludeSpecsUnescaped = excludeSpecsEscaped?.Where(IsValidExclude).Select(i => EscapingUtilities.UnescapeAll(i)).ToList(); // Get the list of actual files which match the filespec. Put // the list into a string array. If the filespec started out From 372508113098cc1e8564ffb98a8d777e3b481913 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 4 Mar 2021 14:50:16 +0100 Subject: [PATCH 3/6] Optimize the order of wildcard checks in BuildItemFragments --- src/Build/Evaluation/ItemSpec.cs | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/Build/Evaluation/ItemSpec.cs b/src/Build/Evaluation/ItemSpec.cs index a69377a772b..7a1cb4db89b 100644 --- a/src/Build/Evaluation/ItemSpec.cs +++ b/src/Build/Evaluation/ItemSpec.cs @@ -211,31 +211,28 @@ private List BuildItemFragments(IElementLocation itemSpecLocat // The expression is not of the form "@(X)". Treat as string // Code corresponds to EngineFileUtilities.GetFileList - var containsEscapedWildcards = EscapingUtilities.ContainsEscapedWildcards(splitEscaped); - var containsRealWildcards = FileMatcher.HasWildcards(splitEscaped); + if (!FileMatcher.HasWildcards(splitEscaped)) + { + // No real wildcards means we just return the original string. Don't even bother + // escaping ... it should already be escaped appropriately since it came directly + // from the project file - // '*' is an illegal character to have in a filename. - // todo: file-system assumption on legal path characters: https://github.com/Microsoft/msbuild/issues/781 - if (containsEscapedWildcards && containsRealWildcards) + fragments.Add(new ValueFragment(splitEscaped, projectDirectory)); + } + else if (EscapingUtilities.ContainsEscapedWildcards(splitEscaped)) { + // '*' is an illegal character to have in a filename. + // todo: file-system assumption on legal path characters: https://github.com/Microsoft/msbuild/issues/781 // Just return the original string. fragments.Add(new ValueFragment(splitEscaped, projectDirectory)); } - else if (!containsEscapedWildcards && containsRealWildcards) + else { // Unescape before handing it to the filesystem. var filespecUnescaped = EscapingUtilities.UnescapeAll(splitEscaped); fragments.Add(new GlobFragment(filespecUnescaped, projectDirectory)); } - else - { - // No real wildcards means we just return the original string. Don't even bother - // escaping ... it should already be escaped appropriately since it came directly - // from the project file - - fragments.Add(new ValueFragment(splitEscaped, projectDirectory)); - } } } } From 944c66acbc3d4ad00a2252adc8e65003aee2f26f Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 4 Mar 2021 14:50:40 +0100 Subject: [PATCH 4/6] Optimize the order of wildcard checks in FilespecHasWildcards --- src/Build/Utilities/EngineFileUtilities.cs | 24 +++++++--------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/Build/Utilities/EngineFileUtilities.cs b/src/Build/Utilities/EngineFileUtilities.cs index e6ca9b5919c..02b46c31efa 100644 --- a/src/Build/Utilities/EngineFileUtilities.cs +++ b/src/Build/Utilities/EngineFileUtilities.cs @@ -92,26 +92,16 @@ internal string[] GetFileListEscaped internal static bool FilespecHasWildcards(string filespecEscaped) { - bool containsEscapedWildcards = EscapingUtilities.ContainsEscapedWildcards(filespecEscaped); - bool containsRealWildcards = FileMatcher.HasWildcards(filespecEscaped); - - if (containsEscapedWildcards && containsRealWildcards) - { - // Umm, this makes no sense. The item's Include has both escaped wildcards and - // real wildcards. What does he want us to do? Go to the file system and find - // files that literally have '*' in their filename? Well, that's not going to - // happen because '*' is an illegal character to have in a filename. - - return false; - } - else if (!containsEscapedWildcards && containsRealWildcards) - { - return true; - } - else + if (!FileMatcher.HasWildcards(filespecEscaped)) { return false; } + + // If the item's Include has both escaped wildcards and real wildcards, then it's + // not clear what they are asking us to do. Go to the file system and find + // files that literally have '*' in their filename? Well, that's not going to + // happen because '*' is an illegal character to have in a filename. + return !EscapingUtilities.ContainsEscapedWildcards(filespecEscaped); } /// From 554de4b26bb9fb67416dffe64d995bc754399915 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 4 Mar 2021 14:52:02 +0100 Subject: [PATCH 5/6] Optimize hex number parsing in UnescapeAll --- src/Shared/EscapingUtilities.cs | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/Shared/EscapingUtilities.cs b/src/Shared/EscapingUtilities.cs index 2f0082459b3..9c70c0bb381 100644 --- a/src/Shared/EscapingUtilities.cs +++ b/src/Shared/EscapingUtilities.cs @@ -26,11 +26,25 @@ static internal class EscapingUtilities /// private static Dictionary s_unescapedToEscapedStrings = new Dictionary(StringComparer.Ordinal); - private static bool IsHexDigit(char character) + private static bool TryDecodeHexDigit(char character, out int value) { - return ((character >= '0') && (character <= '9')) - || ((character >= 'A') && (character <= 'F')) - || ((character >= 'a') && (character <= 'f')); + if (character >= '0' && character <= '9') + { + value = character - '0'; + return true; + } + if (character >= 'A' && character <= 'F') + { + value = character - 'A' + 10; + return true; + } + if (character >= 'a' && character <= 'f') + { + value = character - 'a' + 10; + return true; + } + value = default; + return false; } /// @@ -85,8 +99,8 @@ internal static string UnescapeAll(string escapedString, bool trim = false) // for us to even consider doing anything with this. if ( (indexOfPercent <= (escapedStringLength - 3)) && - IsHexDigit(escapedString[indexOfPercent + 1]) && - IsHexDigit(escapedString[indexOfPercent + 2]) + TryDecodeHexDigit(escapedString[indexOfPercent + 1], out int digit1) && + TryDecodeHexDigit(escapedString[indexOfPercent + 2], out int digit2) ) { // First copy all the characters up to the current percent sign into @@ -94,9 +108,7 @@ internal static string UnescapeAll(string escapedString, bool trim = false) unescapedString.Append(escapedString, currentPosition, indexOfPercent - currentPosition); // Convert the %XX to an actual real character. - string hexString = escapedString.Substring(indexOfPercent + 1, 2); - char unescapedCharacter = (char)int.Parse(hexString, System.Globalization.NumberStyles.HexNumber, - CultureInfo.InvariantCulture); + char unescapedCharacter = (char)((digit1 << 4) + digit2); // if the unescaped character is not on the exception list, append it unescapedString.Append(unescapedCharacter); From 5f4f7c2b36b9d03f6c8955cfe36343ef1f5b9e3c Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 4 Mar 2021 14:53:11 +0100 Subject: [PATCH 6/6] Optimize ContainsEscapedWildcards to do the work in one pass --- src/Shared/EscapingUtilities.cs | 38 ++++++++++--------- .../UnitTests/EscapingUtilities_Tests.cs | 7 +++- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/Shared/EscapingUtilities.cs b/src/Shared/EscapingUtilities.cs index 9c70c0bb381..309e39a2988 100644 --- a/src/Shared/EscapingUtilities.cs +++ b/src/Shared/EscapingUtilities.cs @@ -226,28 +226,30 @@ string unescapedString /// /// /// - internal static bool ContainsEscapedWildcards - ( - string escapedString - ) + internal static bool ContainsEscapedWildcards(string escapedString) { - if (-1 != escapedString.IndexOf('%')) + if (escapedString.Length < 3) { - // It has a '%' sign. We have promise. - if ( - (-1 != escapedString.IndexOf("%2", StringComparison.Ordinal)) || - (-1 != escapedString.IndexOf("%3", StringComparison.Ordinal)) - ) + return false; + } + // Look for the first %. We know that it has to be followed by at least two more characters so we subtract 2 + // from the length to search. + int index = escapedString.IndexOf('%', 0, escapedString.Length - 2); + while (index != -1) + { + if (escapedString[index + 1] == '2' && (escapedString[index + 2] == 'a' || escapedString[index + 2] == 'A')) + { + // %2a or %2A + return true; + } + if (escapedString[index + 1] == '3' && (escapedString[index + 2] == 'f' || escapedString[index + 2] == 'F')) { - // It has either a '%2' or a '%3'. This is looking very promising. - return - - (-1 != escapedString.IndexOf("%2a", StringComparison.Ordinal)) || - (-1 != escapedString.IndexOf("%2A", StringComparison.Ordinal)) || - (-1 != escapedString.IndexOf("%3f", StringComparison.Ordinal)) || - (-1 != escapedString.IndexOf("%3F", StringComparison.Ordinal)) - ; + // %3f or %3F + return true; } + // Continue searching for % starting at (index + 1). We know that it has to be followed by at least two + // more characters so we subtract 2 from the length of the substring to search. + index = escapedString.IndexOf('%', index + 1, escapedString.Length - (index + 1) - 2); } return false; } diff --git a/src/Shared/UnitTests/EscapingUtilities_Tests.cs b/src/Shared/UnitTests/EscapingUtilities_Tests.cs index 627de15dff6..b02c378e2eb 100644 --- a/src/Shared/UnitTests/EscapingUtilities_Tests.cs +++ b/src/Shared/UnitTests/EscapingUtilities_Tests.cs @@ -75,13 +75,18 @@ public void EscapeUnescape() public void ContainsEscapedWildcards() { Assert.False(EscapingUtilities.ContainsEscapedWildcards("NoStarOrQMark")); + Assert.False(EscapingUtilities.ContainsEscapedWildcards("%")); + Assert.False(EscapingUtilities.ContainsEscapedWildcards("%%")); + Assert.False(EscapingUtilities.ContainsEscapedWildcards("%2")); Assert.False(EscapingUtilities.ContainsEscapedWildcards("%4")); - Assert.False(EscapingUtilities.ContainsEscapedWildcards("%3B")); + Assert.False(EscapingUtilities.ContainsEscapedWildcards("%3A")); Assert.False(EscapingUtilities.ContainsEscapedWildcards("%2B")); Assert.True(EscapingUtilities.ContainsEscapedWildcards("%2a")); Assert.True(EscapingUtilities.ContainsEscapedWildcards("%2A")); Assert.True(EscapingUtilities.ContainsEscapedWildcards("%3F")); Assert.True(EscapingUtilities.ContainsEscapedWildcards("%3f")); + Assert.True(EscapingUtilities.ContainsEscapedWildcards("%%3f")); + Assert.True(EscapingUtilities.ContainsEscapedWildcards("%3%3f")); } } }