Skip to content

Commit

Permalink
Optimize EngineFileUtilities.GetFileList (#6227)
Browse files Browse the repository at this point in the history
  • Loading branch information
ladipro committed Mar 11, 2021
1 parent be34064 commit e06c993
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 70 deletions.
25 changes: 11 additions & 14 deletions src/Build/Evaluation/ItemSpec.cs
Expand Up @@ -211,31 +211,28 @@ private List<ItemSpecFragment> BuildItemFragments(IElementLocation itemSpecLocat
// The expression is not of the form "@(X)". Treat as string

// Code corresponds to EngineFileUtilities.GetFileList
var containsEscapedWildcards = EscapingUtilities.ContainsEscapedWildcards(splitEscaped);
var containsRealWildcards = FileMatcher.HasWildcards(splitEscaped);
if (!FileMatcher.HasWildcards(splitEscaped))
{
// No real wildcards means we just return the original string. Don't even bother
// escaping ... it should already be escaped appropriately since it came directly
// from the project file

// '*' is an illegal character to have in a filename.
// todo: file-system assumption on legal path characters: https://github.com/Microsoft/msbuild/issues/781
if (containsEscapedWildcards && containsRealWildcards)
fragments.Add(new ValueFragment(splitEscaped, projectDirectory));
}
else if (EscapingUtilities.ContainsEscapedWildcards(splitEscaped))
{
// '*' is an illegal character to have in a filename.
// todo: file-system assumption on legal path characters: https://github.com/Microsoft/msbuild/issues/781
// Just return the original string.
fragments.Add(new ValueFragment(splitEscaped, projectDirectory));
}
else if (!containsEscapedWildcards && containsRealWildcards)
else
{
// Unescape before handing it to the filesystem.
var filespecUnescaped = EscapingUtilities.UnescapeAll(splitEscaped);

fragments.Add(new GlobFragment(filespecUnescaped, projectDirectory));
}
else
{
// No real wildcards means we just return the original string. Don't even bother
// escaping ... it should already be escaped appropriately since it came directly
// from the project file

fragments.Add(new ValueFragment(splitEscaped, projectDirectory));
}
}
}
}
Expand Down
39 changes: 11 additions & 28 deletions src/Build/Utilities/EngineFileUtilities.cs
Expand Up @@ -53,17 +53,15 @@ public EngineFileUtilities(FileMatcher fileMatcher)
/// </summary>
/// <param name="directoryEscaped">The directory to evaluate, escaped.</param>
/// <param name="filespecEscaped">The filespec to evaluate, escaped.</param>
/// <param name="forceEvaluate">Whether to force file glob expansion when eager expansion is turned off</param>
/// <returns>Array of file paths, unescaped.</returns>
internal string[] GetFileListUnescaped
(
string directoryEscaped,
string filespecEscaped,
bool forceEvaluate = false
string filespecEscaped
)

{
return GetFileList(directoryEscaped, filespecEscaped, false /* returnEscaped */, forceEvaluate);
return GetFileList(directoryEscaped, filespecEscaped, returnEscaped: false, forceEvaluateWildCards: false);
}

/// <summary>
Expand All @@ -89,31 +87,21 @@ internal string[] GetFileListEscaped
bool forceEvaluate = false
)
{
return GetFileList(directoryEscaped, filespecEscaped, true /* returnEscaped */, forceEvaluate, excludeSpecsEscaped);
return GetFileList(directoryEscaped, filespecEscaped, returnEscaped: true, forceEvaluate, excludeSpecsEscaped);
}

internal static bool FilespecHasWildcards(string filespecEscaped)
{
bool containsEscapedWildcards = EscapingUtilities.ContainsEscapedWildcards(filespecEscaped);
bool containsRealWildcards = FileMatcher.HasWildcards(filespecEscaped);

if (containsEscapedWildcards && containsRealWildcards)
{
// Umm, this makes no sense. The item's Include has both escaped wildcards and
// real wildcards. What does he want us to do? Go to the file system and find
// files that literally have '*' in their filename? Well, that's not going to
// happen because '*' is an illegal character to have in a filename.

return false;
}
else if (!containsEscapedWildcards && containsRealWildcards)
{
return true;
}
else
if (!FileMatcher.HasWildcards(filespecEscaped))
{
return false;
}

// If the item's Include has both escaped wildcards and real wildcards, then it's
// not clear what they are asking us to do. Go to the file system and find
// files that literally have '*' in their filename? Well, that's not going to
// happen because '*' is an illegal character to have in a filename.
return !EscapingUtilities.ContainsEscapedWildcards(filespecEscaped);
}

/// <summary>
Expand Down Expand Up @@ -143,11 +131,6 @@ private string[] GetFileList
{
ErrorUtilities.VerifyThrowInternalLength(filespecEscaped, nameof(filespecEscaped));

if (excludeSpecsEscaped == null)
{
excludeSpecsEscaped = Enumerable.Empty<string>();
}

string[] fileList;

if (!FilespecHasWildcards(filespecEscaped) ||
Expand All @@ -166,7 +149,7 @@ private string[] GetFileList
// Unescape before handing it to the filesystem.
var directoryUnescaped = EscapingUtilities.UnescapeAll(directoryEscaped);
var filespecUnescaped = EscapingUtilities.UnescapeAll(filespecEscaped);
var excludeSpecsUnescaped = excludeSpecsEscaped.Where(IsValidExclude).Select(i => EscapingUtilities.UnescapeAll(i)).ToList();
var excludeSpecsUnescaped = excludeSpecsEscaped?.Where(IsValidExclude).Select(i => EscapingUtilities.UnescapeAll(i)).ToList();

// Get the list of actual files which match the filespec. Put
// the list into a string array. If the filespec started out
Expand Down
68 changes: 41 additions & 27 deletions src/Shared/EscapingUtilities.cs
Expand Up @@ -26,11 +26,25 @@ static internal class EscapingUtilities
/// </summary>
private static Dictionary<string, string> s_unescapedToEscapedStrings = new Dictionary<string, string>(StringComparer.Ordinal);

private static bool IsHexDigit(char character)
private static bool TryDecodeHexDigit(char character, out int value)
{
return ((character >= '0') && (character <= '9'))
|| ((character >= 'A') && (character <= 'F'))
|| ((character >= 'a') && (character <= 'f'));
if (character >= '0' && character <= '9')
{
value = character - '0';
return true;
}
if (character >= 'A' && character <= 'F')
{
value = character - 'A' + 10;
return true;
}
if (character >= 'a' && character <= 'f')
{
value = character - 'a' + 10;
return true;
}
value = default;
return false;
}

/// <summary>
Expand Down Expand Up @@ -85,18 +99,16 @@ internal static string UnescapeAll(string escapedString, bool trim = false)
// for us to even consider doing anything with this.
if (
(indexOfPercent <= (escapedStringLength - 3)) &&
IsHexDigit(escapedString[indexOfPercent + 1]) &&
IsHexDigit(escapedString[indexOfPercent + 2])
TryDecodeHexDigit(escapedString[indexOfPercent + 1], out int digit1) &&
TryDecodeHexDigit(escapedString[indexOfPercent + 2], out int digit2)
)
{
// First copy all the characters up to the current percent sign into
// the destination.
unescapedString.Append(escapedString, currentPosition, indexOfPercent - currentPosition);

// Convert the %XX to an actual real character.
string hexString = escapedString.Substring(indexOfPercent + 1, 2);
char unescapedCharacter = (char)int.Parse(hexString, System.Globalization.NumberStyles.HexNumber,
CultureInfo.InvariantCulture);
char unescapedCharacter = (char)((digit1 << 4) + digit2);

// if the unescaped character is not on the exception list, append it
unescapedString.Append(unescapedCharacter);
Expand Down Expand Up @@ -214,28 +226,30 @@ string unescapedString
/// </summary>
/// <param name="escapedString"></param>
/// <returns></returns>
internal static bool ContainsEscapedWildcards
(
string escapedString
)
internal static bool ContainsEscapedWildcards(string escapedString)
{
if (-1 != escapedString.IndexOf('%'))
if (escapedString.Length < 3)
{
// It has a '%' sign. We have promise.
if (
(-1 != escapedString.IndexOf("%2", StringComparison.Ordinal)) ||
(-1 != escapedString.IndexOf("%3", StringComparison.Ordinal))
)
return false;
}
// Look for the first %. We know that it has to be followed by at least two more characters so we subtract 2
// from the length to search.
int index = escapedString.IndexOf('%', 0, escapedString.Length - 2);
while (index != -1)
{
if (escapedString[index + 1] == '2' && (escapedString[index + 2] == 'a' || escapedString[index + 2] == 'A'))
{
// %2a or %2A
return true;
}
if (escapedString[index + 1] == '3' && (escapedString[index + 2] == 'f' || escapedString[index + 2] == 'F'))
{
// It has either a '%2' or a '%3'. This is looking very promising.
return

(-1 != escapedString.IndexOf("%2a", StringComparison.Ordinal)) ||
(-1 != escapedString.IndexOf("%2A", StringComparison.Ordinal)) ||
(-1 != escapedString.IndexOf("%3f", StringComparison.Ordinal)) ||
(-1 != escapedString.IndexOf("%3F", StringComparison.Ordinal))
;
// %3f or %3F
return true;
}
// Continue searching for % starting at (index + 1). We know that it has to be followed by at least two
// more characters so we subtract 2 from the length of the substring to search.
index = escapedString.IndexOf('%', index + 1, escapedString.Length - (index + 1) - 2);
}
return false;
}
Expand Down
7 changes: 6 additions & 1 deletion src/Shared/UnitTests/EscapingUtilities_Tests.cs
Expand Up @@ -75,13 +75,18 @@ public void EscapeUnescape()
public void ContainsEscapedWildcards()
{
Assert.False(EscapingUtilities.ContainsEscapedWildcards("NoStarOrQMark"));
Assert.False(EscapingUtilities.ContainsEscapedWildcards("%"));
Assert.False(EscapingUtilities.ContainsEscapedWildcards("%%"));
Assert.False(EscapingUtilities.ContainsEscapedWildcards("%2"));
Assert.False(EscapingUtilities.ContainsEscapedWildcards("%4"));
Assert.False(EscapingUtilities.ContainsEscapedWildcards("%3B"));
Assert.False(EscapingUtilities.ContainsEscapedWildcards("%3A"));
Assert.False(EscapingUtilities.ContainsEscapedWildcards("%2B"));
Assert.True(EscapingUtilities.ContainsEscapedWildcards("%2a"));
Assert.True(EscapingUtilities.ContainsEscapedWildcards("%2A"));
Assert.True(EscapingUtilities.ContainsEscapedWildcards("%3F"));
Assert.True(EscapingUtilities.ContainsEscapedWildcards("%3f"));
Assert.True(EscapingUtilities.ContainsEscapedWildcards("%%3f"));
Assert.True(EscapingUtilities.ContainsEscapedWildcards("%3%3f"));
}
}
}

0 comments on commit e06c993

Please sign in to comment.