Skip to content

Commit

Permalink
Optimize Match-on-metadata (#6035)
Browse files Browse the repository at this point in the history
Fixes [AB#1261123](https://devdiv.visualstudio.com/0bdbc590-a062-4c3f-b0f6-9383f67865ee/_workitems/edit/1261123) (feedback ticket)

### Context
Checking metadata for matches was taking (#fragments in item) * (#fragments in item to remove) * (#metadata). For many users, this was extremely slow.

### Changes Made
Put fragments into a custom MetadataSet object before checking for a match. This takes it from O(n * r * m) to O(m * (n + r)). Since m is normally small, this is a substantial improvement.

### Testing
Time on a single operation went from ~180 seconds to ~150 ms for the penultimate version. It also passes @cdmihai's extensive unit tests.*

### Notes
Is there any reason to keep the error for referencing multiple items? As a user, I would expect not-match-on-metadata to ignore metadata and match-on-metadata to ignore the item.
Is it ok to rely on IEnumerable returning element in a consistent order?
Should we keep returning no-match if all the metadata are empty on both items? It makes more sense to me to have that be a match, and it would simplify the code slightly. If it makes more sense to others, now would be the time to do it, since the code isn't that old.

\* Except one on paths. Not sure why that's failing, since it's passing in VS, just not from the command line. Will look into it more.
  • Loading branch information
Forgind committed Feb 24, 2021
1 parent 56626eb commit 30bbc5a
Show file tree
Hide file tree
Showing 20 changed files with 212 additions and 151 deletions.
22 changes: 13 additions & 9 deletions src/Build.OM.UnitTests/Definition/ProjectItem_Tests.cs
Expand Up @@ -2231,7 +2231,7 @@ public void RemoveWithItemReferenceOnFilePathMatchingMetadata()
<I1 Include='c1' M1='foo/bar.vb' M2='y'/>
<I1 Include='d1' M1='foo\foo\foo' M2='b'/>
<I1 Include='e1' M1='a/b/../c/./d' M2='1'/>
<I1 Include='f1' M1='{ ObjectModelHelpers.TempProjectDir }\b\c' M2='6'/>
<I1 Include='f1' M1='{ Environment.CurrentDirectory }\b\c' M2='6'/>
<I2 Include='a2' M1='FOO.TXT' m2='c'/>
<I2 Include='b2' M1='foo/bar.txt' m2='x'/>
Expand Down Expand Up @@ -2403,7 +2403,7 @@ public void KeepWithItemReferenceOnNonmatchingMetadata()
}

[Fact]
public void FailWithMatchingMultipleMetadata()
public void RemoveMatchingMultipleMetadata()
{
string content = ObjectModelHelpers.FormatProjectContentsWithItemGroupFragment(
@"<I1 Include='a1' M1='1' M2='a'/>
Expand All @@ -2418,12 +2418,16 @@ public void FailWithMatchingMultipleMetadata()
<I2 Remove='@(I1)' MatchOnMetadata='M1;M2'/>");

Should.Throw<InvalidProjectFileException>(() => ObjectModelHelpers.CreateInMemoryProject(content))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
Project project = ObjectModelHelpers.CreateInMemoryProject(content);
IEnumerable<ProjectItem> items = project.ItemsIgnoringCondition.Where(i => i.ItemType.Equals("I2"));
items.Count().ShouldBe(3);
items.ElementAt(0).EvaluatedInclude.ShouldBe("a2");
items.ElementAt(1).EvaluatedInclude.ShouldBe("c2");
items.ElementAt(2).EvaluatedInclude.ShouldBe("d2");
}

[Fact]
public void FailWithMultipleItemReferenceOnMatchingMetadata()
public void RemoveMultipleItemReferenceOnMatchingMetadata()
{
string content = ObjectModelHelpers.FormatProjectContentsWithItemGroupFragment(
@"<I1 Include='a1' M1='1' M2='a'/>
Expand All @@ -2443,8 +2447,9 @@ public void FailWithMultipleItemReferenceOnMatchingMetadata()
<I3 Remove='@(I1);@(I2)' MatchOnMetadata='M1' />");

Should.Throw<InvalidProjectFileException>(() => ObjectModelHelpers.CreateInMemoryProject(content))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
Project project = ObjectModelHelpers.CreateInMemoryProject(content);
IEnumerable<ProjectItem> items = project.ItemsIgnoringCondition.Where(i => i.ItemType.Equals("I3"));
items.ShouldBeEmpty();
}

[Fact]
Expand All @@ -2462,9 +2467,8 @@ public void FailWithMetadataItemReferenceOnMatchingMetadata()
<I2 Include='d2' M1='y' m2='d'/>
<I2 Remove='%(I1.M1)' MatchOnMetadata='M1' />");

Should.Throw<InvalidProjectFileException>(() => ObjectModelHelpers.CreateInMemoryProject(content))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToReferencedItems");
}

[Fact]
Expand Down
19 changes: 12 additions & 7 deletions src/Build.UnitTests/BackEnd/IntrinsicTask_Tests.cs
Expand Up @@ -1989,7 +1989,7 @@ public void KeepWithItemReferenceOnNonmatchingMetadata()
}

[Fact]
public void FailWithMatchingMultipleMetadata()
public void RemoveWithMatchingMultipleMetadata()
{
string content = ObjectModelHelpers.CleanupFileContents(
@"<Project ToolsVersion='msbuilddefaulttoolsversion' xmlns='msbuildnamespace'>
Expand All @@ -2010,12 +2010,16 @@ public void FailWithMatchingMultipleMetadata()
</Target></Project>");
IntrinsicTask task = CreateIntrinsicTask(content);
Lookup lookup = LookupHelpers.CreateEmptyLookup();
Assert.ThrowsAny<InvalidProjectFileException>(() => ExecuteTask(task, lookup))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
ExecuteTask(task, lookup);
ICollection<ProjectItemInstance> items = lookup.GetItems("I2");
items.Count().ShouldBe(3);
items.ElementAt(0).EvaluatedInclude.ShouldBe("a2");
items.ElementAt(1).EvaluatedInclude.ShouldBe("c2");
items.ElementAt(2).EvaluatedInclude.ShouldBe("d2");
}

[Fact]
public void FailWithMultipleItemReferenceOnMatchingMetadata()
public void RemoveWithMultipleItemReferenceOnMatchingMetadata()
{
string content = ObjectModelHelpers.CleanupFileContents(
@"<Project ToolsVersion='msbuilddefaulttoolsversion' xmlns='msbuildnamespace'>
Expand All @@ -2041,8 +2045,9 @@ public void FailWithMultipleItemReferenceOnMatchingMetadata()
</Target></Project>");
IntrinsicTask task = CreateIntrinsicTask(content);
Lookup lookup = LookupHelpers.CreateEmptyLookup();
Assert.ThrowsAny<InvalidProjectFileException>(() => ExecuteTask(task, lookup))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
ExecuteTask(task, lookup);
ICollection<ProjectItemInstance> items = lookup.GetItems("I3");
items.ShouldBeEmpty();
}

[Fact]
Expand All @@ -2068,7 +2073,7 @@ public void FailWithMetadataItemReferenceOnMatchingMetadata()
IntrinsicTask task = CreateIntrinsicTask(content);
Lookup lookup = LookupHelpers.CreateEmptyLookup();
Assert.ThrowsAny<InvalidProjectFileException>(() => ExecuteTask(task, lookup))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToReferencedItems");
}

[Fact]
Expand Down
Expand Up @@ -249,7 +249,7 @@ private void ExecuteRemove(ProjectItemGroupTaskItemInstance child, ItemBucket bu
}
else
{
itemsToRemove = FindItemsUsingMatchOnMetadata(group, child, bucket, matchOnMetadata, matchingOptions);
itemsToRemove = FindItemsMatchingMetadataSpecification(group, child, bucket.Expander, matchOnMetadata, matchingOptions);
}

if (itemsToRemove != null)
Expand All @@ -268,29 +268,6 @@ private void ExecuteRemove(ProjectItemGroupTaskItemInstance child, ItemBucket bu
}
}

private List<ProjectItemInstance> FindItemsUsingMatchOnMetadata(
ICollection<ProjectItemInstance> items,
ProjectItemGroupTaskItemInstance child,
ItemBucket bucket,
HashSet<string> matchOnMetadata,
MatchOnMetadataOptions options)
{
ErrorUtilities.VerifyThrowArgumentNull(matchOnMetadata, nameof(matchOnMetadata));

var itemSpec = new ItemSpec<ProjectPropertyInstance, ProjectItemInstance>(child.Remove, bucket.Expander, child.RemoveLocation, Project.Directory, true);

ProjectFileErrorUtilities.VerifyThrowInvalidProjectFile(
itemSpec.Fragments.Count == 1
&& itemSpec.Fragments.First() is ItemSpec<ProjectPropertyInstance, ProjectItemInstance>.ItemExpressionFragment
&& matchOnMetadata.Count == 1,
new BuildEventFileInfo(string.Empty),
"OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem",
child.RemoveLocation,
child.Remove);

return items.Where(item => itemSpec.MatchesItemOnMetadata(item, matchOnMetadata, options)).ToList();
}

/// <summary>
/// Modifies items in the world - specifically, changes their metadata. Changes to items that are part of the project manifest are backed up, so
/// they can be reverted when the project is reset after the end of the build.
Expand Down Expand Up @@ -597,6 +574,24 @@ private List<ProjectItemInstance> FindItemsMatchingSpecification
return itemsRemoved;
}

private List<ProjectItemInstance> FindItemsMatchingMetadataSpecification(
ICollection<ProjectItemInstance> group,
ProjectItemGroupTaskItemInstance child,
Expander<ProjectPropertyInstance, ProjectItemInstance> expander,
HashSet<string> matchOnMetadata,
MatchOnMetadataOptions matchingOptions)
{
ItemSpec<ProjectPropertyInstance, ProjectItemInstance> itemSpec = new ItemSpec<ProjectPropertyInstance, ProjectItemInstance>(child.Remove, expander, child.RemoveLocation, Project.Directory, true);
ProjectFileErrorUtilities.VerifyThrowInvalidProjectFile(
itemSpec.Fragments.All(f => f is ItemSpec<ProjectPropertyInstance, ProjectItemInstance>.ItemExpressionFragment),
new BuildEventFileInfo(string.Empty),
"OM_MatchOnMetadataIsRestrictedToReferencedItems",
child.RemoveLocation,
child.Remove);
MetadataTrie<ProjectPropertyInstance, ProjectItemInstance> metadataSet = new MetadataTrie<ProjectPropertyInstance, ProjectItemInstance>(matchingOptions, matchOnMetadata, itemSpec);
return group.Where(item => metadataSet.Contains(matchOnMetadata.Select(m => item.GetMetadataValue(m)))).ToList();
}

/// <summary>
/// This class is used during ItemGroup intrinsic tasks to resolve metadata references. It consists of three tables:
/// 1. The metadata added during evaluation.
Expand Down
153 changes: 107 additions & 46 deletions src/Build/Evaluation/ItemSpec.cs
Expand Up @@ -85,24 +85,6 @@ public override bool IsMatch(string itemToMatch)
return ReferencedItems.Any(v => v.ItemAsValueFragment.IsMatch(itemToMatch));
}

public override bool IsMatchOnMetadata(IItem item, IEnumerable<string> metadata, MatchOnMetadataOptions options)
{
return ReferencedItems.Any(referencedItem =>
metadata.All(m => !item.GetMetadataValue(m).Equals(string.Empty) && MetadataComparer(options, item.GetMetadataValue(m), referencedItem.Item.GetMetadataValue(m))));
}

private bool MetadataComparer(MatchOnMetadataOptions options, string itemMetadata, string referencedItemMetadata)
{
if (options.Equals(MatchOnMetadataOptions.PathLike))
{
return FileUtilities.ComparePathsNoThrow(itemMetadata, referencedItemMetadata, ProjectDirectory);
}
else
{
return String.Equals(itemMetadata, referencedItemMetadata, options.Equals(MatchOnMetadataOptions.CaseInsensitive) ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal);
}
}

public override IMSBuildGlob ToMSBuildGlob()
{
return MsBuildGlob;
Expand Down Expand Up @@ -310,26 +292,6 @@ public bool MatchesItem(I item)
return false;
}

/// <summary>
/// Return true if any of the given <paramref name="metadata" /> matches the metadata on <paramref name="item" />
/// </summary>
/// <param name="item">The item to attempt to find a match for based on matching metadata</param>
/// <param name="metadata">Names of metadata to look for matches for</param>
/// <param name="options">metadata option matching</param>
/// <returns></returns>
public bool MatchesItemOnMetadata(IItem item, IEnumerable<string> metadata, MatchOnMetadataOptions options)
{
foreach (var fragment in Fragments)
{
if (fragment.IsMatchOnMetadata(item, metadata, options))
{
return true;
}
}

return false;
}

/// <summary>
/// Return the fragments that match against the given <paramref name="itemToMatch" />
/// </summary>
Expand Down Expand Up @@ -456,14 +418,6 @@ public virtual bool IsMatch(string itemToMatch)
return FileMatcher.IsMatch(itemToMatch);
}

/// <summary>
/// Returns true if <paramref name="itemToMatch" /> matches any ReferencedItems based on <paramref name="metadata" /> and <paramref name="options" />.
/// </summary>
public virtual bool IsMatchOnMetadata(IItem itemToMatch, IEnumerable<string> metadata, MatchOnMetadataOptions options)
{
return false;
}

public virtual IMSBuildGlob ToMSBuildGlob()
{
return MsBuildGlob;
Expand Down Expand Up @@ -504,4 +458,111 @@ public GlobFragment(string textFragment, string projectDirectory)
&& TextFragment[2] == '*'
&& FileUtilities.IsAnySlash(TextFragment[3]);
}

/// <summary>
/// A Trie representing the sets of values of specified metadata taken on by the referenced items.
/// A single flat list or set of metadata values would not work in this case because we are matching
/// on multiple metadata. If one item specifies NotTargetFramework to be net46 and TargetFramework to
/// be netcoreapp3.1, we wouldn't want to match that to an item with TargetFramework 46 and
/// NotTargetFramework netcoreapp3.1.
///
/// Implementing this as a list of sets where each metadatum key has its own set also would not work
/// because different items could match on different metadata, and we want to check to see if any
/// single item matches on all the metadata. As an example, consider this scenario:
/// Item Baby has metadata GoodAt="eating" BadAt="talking" OkAt="sleeping"
/// Item Child has metadata GoodAt="sleeping" BadAt="eating" OkAt="talking"
/// Item Adolescent has metadata GoodAt="talking" BadAt="sleeping" OkAt="eating"
/// Specifying these three metadata:
/// Item Forgind with metadata GoodAt="sleeping" BadAt="talking" OkAt="eating"
/// should match none of them because Forgind doesn't match all three metadata of any of the items.
/// With a list of sets, Forgind would match Baby on BadAt, Child on GoodAt, and Adolescent on OkAt,
/// and Forgind would be erroneously removed.
///
/// With a Trie as below, Items specify paths in the tree, so going to any child node eliminates all
/// items that don't share that metadatum. This ensures the match is proper.
///
/// Todo: Tries naturally can have different shapes depending on in what order the metadata are considered.
/// Specifically, if all the items share a single metadata value for the one metadatum and have different
/// values for a second metadatum, it will have only one node more than the number of items if the first
/// metadatum is considered first. If the metadatum is considered first, it will have twice that number.
/// Users can theoretically specify the order in which metadata should be considered by reordering them
/// on the line invoking this, but that is extremely nonobvious from a user's perspective.
/// It would be nice to detect poorly-ordered metadata and account for it to avoid making more nodes than
/// necessary. This would need to order if appropriately both in creating the MetadataTrie and in using it,
/// so it could best be done as a preprocessing step. For now, wait to find out if it's necessary (users'
/// computers run out of memory) before trying to implement it.
/// </summary>
/// <typeparam name="P">Property type</typeparam>
/// <typeparam name="I">Item type</typeparam>
internal sealed class MetadataTrie<P, I> where P : class, IProperty where I : class, IItem, IMetadataTable
{
private readonly Dictionary<string, MetadataTrie<P, I>> _children;
private readonly Func<string, string> _normalize;

internal MetadataTrie(MatchOnMetadataOptions options, IEnumerable<string> metadata, ItemSpec<P, I> itemSpec)
{
StringComparer comparer = options == MatchOnMetadataOptions.CaseSensitive ? StringComparer.Ordinal :
options == MatchOnMetadataOptions.CaseInsensitive || FileUtilities.PathComparison == StringComparison.OrdinalIgnoreCase ? StringComparer.OrdinalIgnoreCase :
StringComparer.Ordinal;
_children = new Dictionary<string, MetadataTrie<P, I>>(comparer);
_normalize = options == MatchOnMetadataOptions.PathLike ? (Func<string, string>) (p => FileUtilities.NormalizePathForComparisonNoThrow(p, Environment.CurrentDirectory)) : p => p;
foreach (ItemSpec<P, I>.ItemExpressionFragment frag in itemSpec.Fragments)
{
foreach (ItemSpec<P, I>.ReferencedItem referencedItem in frag.ReferencedItems)
{
this.Add(metadata.Select(m => referencedItem.Item.GetMetadataValue(m)), comparer);
}
}
}

private MetadataTrie(StringComparer comparer)
{
_children = new Dictionary<string, MetadataTrie<P, I>>(comparer);
}

// Relies on IEnumerable returning the metadata in a reasonable order. Reasonable?
private void Add(IEnumerable<string> metadata, StringComparer comparer)
{
MetadataTrie<P, I> current = this;
foreach (string m in metadata)
{
string normalizedString = _normalize(m);
if (!current._children.TryGetValue(normalizedString, out MetadataTrie<P, I> child))
{
child = new MetadataTrie<P, I>(comparer);
current._children.Add(normalizedString, child);
}
current = child;
}
}

internal bool Contains(IEnumerable<string> metadata)
{
MetadataTrie<P, I> current = this;
foreach (string m in metadata)
{
if (String.IsNullOrEmpty(m))
{
return false;
}
if (!current._children.TryGetValue(_normalize(m), out current))
{
return false;
}
}
return true;
}
}

public enum MatchOnMetadataOptions
{
CaseSensitive,
CaseInsensitive,
PathLike
}

public static class MatchOnMetadataConstants
{
public const MatchOnMetadataOptions MatchOnMetadataOptionsDefaultValue = MatchOnMetadataOptions.CaseSensitive;
}
}

0 comments on commit 30bbc5a

Please sign in to comment.