Skip to content

Commit

Permalink
Optimize glob evaluation - PR #6151
Browse files Browse the repository at this point in the history
Collection of several changes to the globbing logic aiming to make .NET Core project evaluation faster.
- Refactor, simplify, and micro-optimize hot code paths.
- Eliminate exclude patterns early if it's clear that they don't intersect with the include.
- Optimize most common glob patterns to match without regex (building a typical .NET Core app now does not instantiate a single regex for glob matching).
- Cache the results of all filesystem enumeration calls, not just directories.
  • Loading branch information
rokonec committed Mar 3, 2021
2 parents cbca164 + c5afa7c commit 202f872
Show file tree
Hide file tree
Showing 7 changed files with 382 additions and 222 deletions.
Expand Up @@ -471,6 +471,14 @@ public void ContextDisambiguatesSameRelativeGlobsPointingOutsideDifferentProject
[MemberData(nameof(ContextDisambiguatesRelativeGlobsData))]
public void ContextDisambiguatesAFullyQualifiedGlobPointingInAnotherRelativeGlobsCone(EvaluationContext.SharingPolicy policy, string[][] expectedGlobExpansions)
{
if (policy == EvaluationContext.SharingPolicy.Shared)
{
// This test case has a dependency on our glob expansion caching policy. If the evaluation context is reused
// between evaluations and files are added to the filesystem between evaluations, the cache may be returning
// stale results. Run only the Isolated variant.
return;
}

var project1Directory = _env.DefaultTestDirectory.CreateDirectory("Project1");
var project1GlobDirectory = project1Directory.CreateDirectory("Glob").CreateDirectory("1").Path;

Expand Down
6 changes: 3 additions & 3 deletions src/Build/Evaluation/Context/EvaluationContext.cs
Expand Up @@ -3,7 +3,7 @@

using System;
using System.Collections.Concurrent;
using System.Collections.Immutable;
using System.Collections.Generic;
using System.Threading;
using Microsoft.Build.BackEnd.SdkResolution;
using Microsoft.Build.FileSystem;
Expand Down Expand Up @@ -48,7 +48,7 @@ public enum SharingPolicy
/// <summary>
/// Key to file entry list. Example usages: cache glob expansion and intermediary directory expansions during glob expansion.
/// </summary>
private ConcurrentDictionary<string, ImmutableArray<string>> FileEntryExpansionCache { get; }
private ConcurrentDictionary<string, IReadOnlyList<string>> FileEntryExpansionCache { get; }

private EvaluationContext(SharingPolicy policy, IFileSystem fileSystem)
{
Expand All @@ -61,7 +61,7 @@ private EvaluationContext(SharingPolicy policy, IFileSystem fileSystem)
Policy = policy;

SdkResolverService = new CachingSdkResolverService();
FileEntryExpansionCache = new ConcurrentDictionary<string, ImmutableArray<string>>();
FileEntryExpansionCache = new ConcurrentDictionary<string, IReadOnlyList<string>>();
FileSystem = fileSystem ?? new CachingFileSystemWrapper(FileSystems.Default);
EngineFileUtilities = new EngineFileUtilities(new FileMatcher(FileSystem, FileEntryExpansionCache));
}
Expand Down
46 changes: 17 additions & 29 deletions src/Build/Globbing/MSBuildGlob.cs
Expand Up @@ -28,19 +28,17 @@ public class MSBuildGlob : IMSBuildGlob
public string FixedDirectoryPart { get; }
public string WildcardDirectoryPart { get; }
public string FilenamePart { get; }
public string MatchFileExpression { get; }
public bool NeedsRecursion { get; }
public Regex Regex { get; }

public GlobState(string globRoot, string fileSpec, bool isLegal, string fixedDirectoryPart, string wildcardDirectoryPart, string filenamePart, string matchFileExpression, bool needsRecursion, Regex regex)
public GlobState(string globRoot, string fileSpec, bool isLegal, string fixedDirectoryPart, string wildcardDirectoryPart, string filenamePart, bool needsRecursion, Regex regex)
{
GlobRoot = globRoot;
FileSpec = fileSpec;
IsLegal = isLegal;
FixedDirectoryPart = fixedDirectoryPart;
WildcardDirectoryPart = wildcardDirectoryPart;
FilenamePart = filenamePart;
MatchFileExpression = matchFileExpression;
NeedsRecursion = needsRecursion;
Regex = regex;
}
Expand Down Expand Up @@ -117,23 +115,20 @@ public MatchInfoResult MatchInfo(string stringToMatch)
{
ErrorUtilities.VerifyThrowArgumentNull(stringToMatch, nameof(stringToMatch));

if (FileUtilities.PathIsInvalid(stringToMatch) ||
!IsLegal)
if (FileUtilities.PathIsInvalid(stringToMatch) || !IsLegal)
{
return MatchInfoResult.Empty;
}

var normalizedInput = NormalizeMatchInput(stringToMatch);
string normalizedInput = NormalizeMatchInput(stringToMatch);

bool isMatch;
string fixedDirectoryPart, wildcardDirectoryPart, filenamePart;
FileMatcher.GetRegexMatchInfo(
normalizedInput,
_state.Value.Regex,
out isMatch,
out fixedDirectoryPart,
out wildcardDirectoryPart,
out filenamePart);
out bool isMatch,
out string fixedDirectoryPart,
out string wildcardDirectoryPart,
out string filenamePart);

return new MatchInfoResult(isMatch, fixedDirectoryPart, wildcardDirectoryPart, filenamePart);
}
Expand All @@ -145,7 +140,7 @@ private string NormalizeMatchInput(string stringToMatch)

// Degenerate case when the string to match is empty.
// Ensure trailing slash because the fixed directory part has a trailing slash.
if (stringToMatch == string.Empty)
if (string.IsNullOrEmpty(stringToMatch))
{
normalizedInput += Path.DirectorySeparatorChar;
}
Expand All @@ -172,7 +167,7 @@ public static MSBuildGlob Parse(string globRoot, string fileSpec)
ErrorUtilities.VerifyThrowArgumentNull(fileSpec, nameof(fileSpec));
ErrorUtilities.VerifyThrowArgumentInvalidPath(globRoot, nameof(globRoot));

if (globRoot == string.Empty)
if (string.IsNullOrEmpty(globRoot))
{
globRoot = Directory.GetCurrentDirectory();
}
Expand All @@ -181,22 +176,13 @@ public static MSBuildGlob Parse(string globRoot, string fileSpec)

var lazyState = new Lazy<GlobState>(() =>
{
string fixedDirectoryPart = null;
string wildcardDirectoryPart = null;
string filenamePart = null;
string matchFileExpression;
bool needsRecursion;
bool isLegalFileSpec;
FileMatcher.Default.GetFileSpecInfo(
fileSpec,
out fixedDirectoryPart,
out wildcardDirectoryPart,
out filenamePart,
out matchFileExpression,
out needsRecursion,
out isLegalFileSpec,
out string fixedDirectoryPart,
out string wildcardDirectoryPart,
out string filenamePart,
out bool needsRecursion,
out bool isLegalFileSpec,
(fixedDirPart, wildcardDirPart, filePart) =>
{
var normalizedFixedPart = NormalizeTheFixedDirectoryPartAgainstTheGlobRoot(fixedDirPart, globRoot);
Expand All @@ -207,6 +193,8 @@ public static MSBuildGlob Parse(string globRoot, string fileSpec)
Regex regex = null;
if (isLegalFileSpec)
{
string matchFileExpression = FileMatcher.RegularExpressionFromFileSpec(fixedDirectoryPart, wildcardDirectoryPart, filenamePart);
lock (s_regexCache)
{
s_regexCache.TryGetValue(matchFileExpression, out regex);
Expand All @@ -226,7 +214,7 @@ public static MSBuildGlob Parse(string globRoot, string fileSpec)
regex ??= newRegex;
}
}
return new GlobState(globRoot, fileSpec, isLegalFileSpec, fixedDirectoryPart, wildcardDirectoryPart, filenamePart, matchFileExpression, needsRecursion, regex);
return new GlobState(globRoot, fileSpec, isLegalFileSpec, fixedDirectoryPart, wildcardDirectoryPart, filenamePart, needsRecursion, regex);
},
true);

Expand Down
60 changes: 47 additions & 13 deletions src/Build/Utilities/FileSpecMatchTester.cs
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using Microsoft.Build.Shared;
using System;
using System.Diagnostics;
using System.IO;
using System.Text.RegularExpressions;
Expand All @@ -12,54 +13,86 @@ namespace Microsoft.Build.Internal
{
private readonly string _currentDirectory;
private readonly string _unescapedFileSpec;
private readonly string _filenamePattern;
private readonly Regex _regex;

private FileSpecMatcherTester(string currentDirectory, string unescapedFileSpec, Regex regex)
private FileSpecMatcherTester(string currentDirectory, string unescapedFileSpec, string filenamePattern, Regex regex)
{
Debug.Assert(!string.IsNullOrEmpty(unescapedFileSpec));
Debug.Assert(currentDirectory != null);

_currentDirectory = currentDirectory;
_unescapedFileSpec = unescapedFileSpec;
_filenamePattern = filenamePattern;
_regex = regex;
}

public static FileSpecMatcherTester Parse(string currentDirectory, string fileSpec)
{
string unescapedFileSpec = EscapingUtilities.UnescapeAll(fileSpec);
Regex regex = EngineFileUtilities.FilespecHasWildcards(fileSpec) ? CreateRegex(unescapedFileSpec, currentDirectory) : null;
string filenamePattern = null;
Regex regex = null;

return new FileSpecMatcherTester(currentDirectory, unescapedFileSpec, regex);
if (EngineFileUtilities.FilespecHasWildcards(fileSpec))
{
CreateRegexOrFilenamePattern(unescapedFileSpec, currentDirectory, out filenamePattern, out regex);
}

return new FileSpecMatcherTester(currentDirectory, unescapedFileSpec, filenamePattern, regex);
}

public bool IsMatch(string fileToMatch)
{
Debug.Assert(!string.IsNullOrEmpty(fileToMatch));

// check if there is a regex matching the file
// We do the matching using one of three code paths, depending on the value of _filenamePattern and _regex.
if (_regex != null)
{
var normalizedFileToMatch = FileUtilities.GetFullPathNoThrow(Path.Combine(_currentDirectory, fileToMatch));
string normalizedFileToMatch = FileUtilities.GetFullPathNoThrow(Path.Combine(_currentDirectory, fileToMatch));
return _regex.IsMatch(normalizedFileToMatch);
}

if (_filenamePattern != null)
{
// Check file name first as it's more likely to not match.
string filename = Path.GetFileName(fileToMatch);
if (!FileMatcher.IsMatch(filename, _filenamePattern))
{
return false;
}

var normalizedFileToMatch = FileUtilities.GetFullPathNoThrow(Path.Combine(_currentDirectory, fileToMatch));
return normalizedFileToMatch.StartsWith(_currentDirectory, StringComparison.OrdinalIgnoreCase);
}

return FileUtilities.ComparePathsNoThrow(_unescapedFileSpec, fileToMatch, _currentDirectory, alwaysIgnoreCase: true);
}

// this method parses the glob and extracts the fixed directory part in order to normalize it and make it absolute
// without this normalization step, strings pointing outside the globbing cone would still match when they shouldn't
// for example, we dont want "**/*.cs" to match "../Shared/Foo.cs"
// todo: glob rooting knowledge partially duplicated with MSBuildGlob.Parse and FileMatcher.ComputeFileEnumerationCacheKey
private static Regex CreateRegex(string unescapedFileSpec, string currentDirectory)
private static void CreateRegexOrFilenamePattern(string unescapedFileSpec, string currentDirectory, out string filenamePattern, out Regex regex)
{
FileMatcher.Default.SplitFileSpec(
unescapedFileSpec,
out string fixedDirPart,
out string wildcardDirectoryPart,
out string filenamePart);
unescapedFileSpec,
out string fixedDirPart,
out string wildcardDirectoryPart,
out string filenamePart);

if (FileUtilities.PathIsInvalid(fixedDirPart))
{
return null;
filenamePattern = null;
regex = null;
return;
}

// Most file specs have "**" as their directory specification so we special case these and make matching faster.
if (string.IsNullOrEmpty(fixedDirPart) && FileMatcher.IsRecursiveDirectoryMatch(wildcardDirectoryPart))
{
filenamePattern = filenamePart;
regex = null;
return;
}

var absoluteFixedDirPart = Path.Combine(currentDirectory, fixedDirPart);
Expand All @@ -74,11 +107,12 @@ private static Regex CreateRegex(string unescapedFileSpec, string currentDirecto

FileMatcher.Default.GetFileSpecInfoWithRegexObject(
recombinedFileSpec,
out Regex regex,
out Regex regexObject,
out bool _,
out bool isLegal);

return isLegal ? regex : null;
filenamePattern = null;
regex = isLegal ? regexObject : null;
}
}
}

0 comments on commit 202f872

Please sign in to comment.