Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Match-on-metadata #6035

Merged
merged 17 commits into from Feb 24, 2021
Merged
22 changes: 13 additions & 9 deletions src/Build.OM.UnitTests/Definition/ProjectItem_Tests.cs
Expand Up @@ -2231,7 +2231,7 @@ public void RemoveWithItemReferenceOnFilePathMatchingMetadata()
<I1 Include='c1' M1='foo/bar.vb' M2='y'/>
<I1 Include='d1' M1='foo\foo\foo' M2='b'/>
<I1 Include='e1' M1='a/b/../c/./d' M2='1'/>
<I1 Include='f1' M1='{ ObjectModelHelpers.TempProjectDir }\b\c' M2='6'/>
<I1 Include='f1' M1='{ Environment.CurrentDirectory }\b\c' M2='6'/>

<I2 Include='a2' M1='FOO.TXT' m2='c'/>
<I2 Include='b2' M1='foo/bar.txt' m2='x'/>
Expand Down Expand Up @@ -2403,7 +2403,7 @@ public void KeepWithItemReferenceOnNonmatchingMetadata()
}

[Fact]
public void FailWithMatchingMultipleMetadata()
public void RemoveMatchingMultipleMetadata()
{
string content = ObjectModelHelpers.FormatProjectContentsWithItemGroupFragment(
@"<I1 Include='a1' M1='1' M2='a'/>
Expand All @@ -2418,12 +2418,16 @@ public void FailWithMatchingMultipleMetadata()

<I2 Remove='@(I1)' MatchOnMetadata='M1;M2'/>");

Should.Throw<InvalidProjectFileException>(() => ObjectModelHelpers.CreateInMemoryProject(content))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
Project project = ObjectModelHelpers.CreateInMemoryProject(content);
IEnumerable<ProjectItem> items = project.ItemsIgnoringCondition.Where(i => i.ItemType.Equals("I2"));
items.Count().ShouldBe(3);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For both of the tests, assert that the right items are left in I2.

items.ElementAt(0).EvaluatedInclude.ShouldBe("a2");
items.ElementAt(1).EvaluatedInclude.ShouldBe("c2");
items.ElementAt(2).EvaluatedInclude.ShouldBe("d2");
}

[Fact]
public void FailWithMultipleItemReferenceOnMatchingMetadata()
public void RemoveMultipleItemReferenceOnMatchingMetadata()
{
string content = ObjectModelHelpers.FormatProjectContentsWithItemGroupFragment(
@"<I1 Include='a1' M1='1' M2='a'/>
Expand All @@ -2443,8 +2447,9 @@ public void FailWithMultipleItemReferenceOnMatchingMetadata()

<I3 Remove='@(I1);@(I2)' MatchOnMetadata='M1' />");

Should.Throw<InvalidProjectFileException>(() => ObjectModelHelpers.CreateInMemoryProject(content))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
Project project = ObjectModelHelpers.CreateInMemoryProject(content);
IEnumerable<ProjectItem> items = project.ItemsIgnoringCondition.Where(i => i.ItemType.Equals("I3"));
items.ShouldBeEmpty();
}

[Fact]
Expand All @@ -2462,9 +2467,8 @@ public void FailWithMetadataItemReferenceOnMatchingMetadata()
<I2 Include='d2' M1='y' m2='d'/>

<I2 Remove='%(I1.M1)' MatchOnMetadata='M1' />");

Should.Throw<InvalidProjectFileException>(() => ObjectModelHelpers.CreateInMemoryProject(content))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToReferencedItems");
}

[Fact]
Expand Down
19 changes: 12 additions & 7 deletions src/Build.UnitTests/BackEnd/IntrinsicTask_Tests.cs
Expand Up @@ -1989,7 +1989,7 @@ public void KeepWithItemReferenceOnNonmatchingMetadata()
}

[Fact]
public void FailWithMatchingMultipleMetadata()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't delete the tests, instead replace the exception assertion with an assertion on I2's contents

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new test implies you can match with multiple data now though. If we want to keep that, the FailWithMatchingMultipleMetadata test is actually invalid.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed it to "RemoveWithMatchingMultipleMetdata." I did enable matching multiple.

public void RemoveWithMatchingMultipleMetadata()
{
string content = ObjectModelHelpers.CleanupFileContents(
@"<Project ToolsVersion='msbuilddefaulttoolsversion' xmlns='msbuildnamespace'>
Expand All @@ -2010,12 +2010,16 @@ public void FailWithMatchingMultipleMetadata()
</Target></Project>");
IntrinsicTask task = CreateIntrinsicTask(content);
Lookup lookup = LookupHelpers.CreateEmptyLookup();
Assert.ThrowsAny<InvalidProjectFileException>(() => ExecuteTask(task, lookup))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
ExecuteTask(task, lookup);
ICollection<ProjectItemInstance> items = lookup.GetItems("I2");
items.Count().ShouldBe(3);
items.ElementAt(0).EvaluatedInclude.ShouldBe("a2");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So matching the names of metadata is case insensitive, but their values are not. The values being case sensitive makes sense. I remember us having a discussion on the topic in a linux issue.

Just thinking out loud.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MatchOnMetadata allows three options: case sensitive, case insensitive, and pathlike. The third is most similar to what you were thinking, I think, with defaulting to case insensitive regardless of OS.

items.ElementAt(1).EvaluatedInclude.ShouldBe("c2");
items.ElementAt(2).EvaluatedInclude.ShouldBe("d2");
}

[Fact]
public void FailWithMultipleItemReferenceOnMatchingMetadata()
public void RemoveWithMultipleItemReferenceOnMatchingMetadata()
{
string content = ObjectModelHelpers.CleanupFileContents(
@"<Project ToolsVersion='msbuilddefaulttoolsversion' xmlns='msbuildnamespace'>
Expand All @@ -2041,8 +2045,9 @@ public void FailWithMultipleItemReferenceOnMatchingMetadata()
</Target></Project>");
IntrinsicTask task = CreateIntrinsicTask(content);
Lookup lookup = LookupHelpers.CreateEmptyLookup();
Assert.ThrowsAny<InvalidProjectFileException>(() => ExecuteTask(task, lookup))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
ExecuteTask(task, lookup);
ICollection<ProjectItemInstance> items = lookup.GetItems("I3");
items.ShouldBeEmpty();
}

[Fact]
Expand All @@ -2068,7 +2073,7 @@ public void FailWithMetadataItemReferenceOnMatchingMetadata()
IntrinsicTask task = CreateIntrinsicTask(content);
Lookup lookup = LookupHelpers.CreateEmptyLookup();
Assert.ThrowsAny<InvalidProjectFileException>(() => ExecuteTask(task, lookup))
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");
.HelpKeyword.ShouldBe("MSBuild.OM_MatchOnMetadataIsRestrictedToReferencedItems");
}

[Fact]
Expand Down
Expand Up @@ -249,7 +249,7 @@ private void ExecuteRemove(ProjectItemGroupTaskItemInstance child, ItemBucket bu
}
else
{
itemsToRemove = FindItemsUsingMatchOnMetadata(group, child, bucket, matchOnMetadata, matchingOptions);
itemsToRemove = FindItemsMatchingMetadataSpecification(group, child, bucket.Expander, matchOnMetadata, matchingOptions);
}

if (itemsToRemove != null)
Expand All @@ -268,29 +268,6 @@ private void ExecuteRemove(ProjectItemGroupTaskItemInstance child, ItemBucket bu
}
}

private List<ProjectItemInstance> FindItemsUsingMatchOnMetadata(
ICollection<ProjectItemInstance> items,
ProjectItemGroupTaskItemInstance child,
ItemBucket bucket,
HashSet<string> matchOnMetadata,
MatchOnMetadataOptions options)
{
ErrorUtilities.VerifyThrowArgumentNull(matchOnMetadata, nameof(matchOnMetadata));

var itemSpec = new ItemSpec<ProjectPropertyInstance, ProjectItemInstance>(child.Remove, bucket.Expander, child.RemoveLocation, Project.Directory, true);

ProjectFileErrorUtilities.VerifyThrowInvalidProjectFile(
itemSpec.Fragments.Count == 1
&& itemSpec.Fragments.First() is ItemSpec<ProjectPropertyInstance, ProjectItemInstance>.ItemExpressionFragment
&& matchOnMetadata.Count == 1,
new BuildEventFileInfo(string.Empty),
"OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem",
child.RemoveLocation,
child.Remove);

return items.Where(item => itemSpec.MatchesItemOnMetadata(item, matchOnMetadata, options)).ToList();
}

/// <summary>
/// Modifies items in the world - specifically, changes their metadata. Changes to items that are part of the project manifest are backed up, so
/// they can be reverted when the project is reset after the end of the build.
Expand Down Expand Up @@ -597,6 +574,24 @@ private List<ProjectItemInstance> FindItemsMatchingSpecification
return itemsRemoved;
}

private List<ProjectItemInstance> FindItemsMatchingMetadataSpecification(
ICollection<ProjectItemInstance> group,
ProjectItemGroupTaskItemInstance child,
Expander<ProjectPropertyInstance, ProjectItemInstance> expander,
HashSet<string> matchOnMetadata,
MatchOnMetadataOptions matchingOptions)
{
ItemSpec<ProjectPropertyInstance, ProjectItemInstance> itemSpec = new(child.Remove, expander, child.RemoveLocation, Project.Directory, true);
ProjectFileErrorUtilities.VerifyThrowInvalidProjectFile(
itemSpec.Fragments.All(f => f is ItemSpec<ProjectPropertyInstance, ProjectItemInstance>.ItemExpressionFragment),
new BuildEventFileInfo(string.Empty),
"OM_MatchOnMetadataIsRestrictedToReferencedItems",
child.RemoveLocation,
child.Remove);
MetadataSet<ProjectPropertyInstance, ProjectItemInstance> metadataSet = new(matchingOptions, matchOnMetadata, itemSpec);
return group.Where(item => metadataSet.Contains(matchOnMetadata.Select(m => item.GetMetadataValue(m)))).ToList();
}

/// <summary>
/// This class is used during ItemGroup intrinsic tasks to resolve metadata references. It consists of three tables:
/// 1. The metadata added during evaluation.
Expand Down
116 changes: 70 additions & 46 deletions src/Build/Evaluation/ItemSpec.cs
Expand Up @@ -85,24 +85,6 @@ public override bool IsMatch(string itemToMatch)
return ReferencedItems.Any(v => v.ItemAsValueFragment.IsMatch(itemToMatch));
}

public override bool IsMatchOnMetadata(IItem item, IEnumerable<string> metadata, MatchOnMetadataOptions options)
{
return ReferencedItems.Any(referencedItem =>
metadata.All(m => !item.GetMetadataValue(m).Equals(string.Empty) && MetadataComparer(options, item.GetMetadataValue(m), referencedItem.Item.GetMetadataValue(m))));
}

private bool MetadataComparer(MatchOnMetadataOptions options, string itemMetadata, string referencedItemMetadata)
{
if (options.Equals(MatchOnMetadataOptions.PathLike))
{
return FileUtilities.ComparePathsNoThrow(itemMetadata, referencedItemMetadata, ProjectDirectory);
}
else
{
return String.Equals(itemMetadata, referencedItemMetadata, options.Equals(MatchOnMetadataOptions.CaseInsensitive) ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal);
}
}

public override IMSBuildGlob ToMSBuildGlob()
{
return MsBuildGlob;
Expand Down Expand Up @@ -310,26 +292,6 @@ public bool MatchesItem(I item)
return false;
}

/// <summary>
/// Return true if any of the given <paramref name="metadata" /> matches the metadata on <paramref name="item" />
/// </summary>
/// <param name="item">The item to attempt to find a match for based on matching metadata</param>
/// <param name="metadata">Names of metadata to look for matches for</param>
/// <param name="options">metadata option matching</param>
/// <returns></returns>
public bool MatchesItemOnMetadata(IItem item, IEnumerable<string> metadata, MatchOnMetadataOptions options)
{
foreach (var fragment in Fragments)
{
if (fragment.IsMatchOnMetadata(item, metadata, options))
{
return true;
}
}

return false;
}

/// <summary>
/// Return the fragments that match against the given <paramref name="itemToMatch" />
/// </summary>
Expand Down Expand Up @@ -456,14 +418,6 @@ public virtual bool IsMatch(string itemToMatch)
return FileMatcher.IsMatch(itemToMatch);
}

/// <summary>
/// Returns true if <paramref name="itemToMatch" /> matches any ReferencedItems based on <paramref name="metadata" /> and <paramref name="options" />.
/// </summary>
public virtual bool IsMatchOnMetadata(IItem itemToMatch, IEnumerable<string> metadata, MatchOnMetadataOptions options)
{
return false;
}

public virtual IMSBuildGlob ToMSBuildGlob()
{
return MsBuildGlob;
Expand Down Expand Up @@ -504,4 +458,74 @@ public GlobFragment(string textFragment, string projectDirectory)
&& TextFragment[2] == '*'
&& FileUtilities.IsAnySlash(TextFragment[3]);
}

internal sealed class MetadataSet<P, I> where P : class, IProperty where I : class, IItem, IMetadataTable
{
private readonly Dictionary<string, MetadataSet<P, I>> _children;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why use this convoluted data structure instead of dumping all metadata values from the Itemspec into a flat set of normalized metadata values?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still not 100% sure I agree with the data structure, but I want to clarify my understanding from the PR review today.

A flat set of normalized metadata values would prevent us from being able to determine exactly which metadata values from which items were matched.

We'd need to do this to enable the logic in a test like RemoveWithMultipleItemReferenceOnMatchingMetadata.

<I3 Remove='@(I1);@(I2)' MatchOnMetadata='M1' />

This remove attribute effectively translates to:
Remove items in I3 where:

  • I1 OR I2 match on the following metadata:
    • M1

I believe this also enables something like:

<I3 Remove='@(I1);@(I2)' MatchOnMetadata='M1;M2' />

Where this remove attribute translates to:
Remove items in I3 where:

  • I1 OR I2 match on the following metadata:
    • M1 AND M2

@Forgind is this understanding correct? If so I think there should be a test for multiple items and multiple metadata.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrote detailed comment for MetadataTrie class. Will push soon.

private readonly Func<string, string> _normalize;

internal MetadataSet(MatchOnMetadataOptions options, IEnumerable<string> metadata, ItemSpec<P, I> itemSpec)
{
StringComparer comparer = options == MatchOnMetadataOptions.CaseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase;
_children = new Dictionary<string, MetadataSet<P, I>>(comparer);
_normalize = options == MatchOnMetadataOptions.PathLike ? p => FileUtilities.NormalizePathForComparisonNoThrow(p, Environment.CurrentDirectory) : p => p;
foreach (ItemSpec<P, I>.ItemExpressionFragment frag in itemSpec.Fragments)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this type check, isn't ItemSpec.Fragments a list of the base class ItemSpecFragment? (List<ItemSpecFragment>)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. It might be casting it automatically when it sees that, but I'm not sure. I don't think we should care about other kinds of fragments, since I don't think they can have metadata, so I think this is ok, but that was a lot of "I think"s.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as whether this is actually a safe, I should have mentioned that the type is checked before the MetadataSet is constructed, so it should always be correct.

{
foreach (ItemSpec<P, I>.ReferencedItem referencedItem in frag.ReferencedItems)
{
this.Add(metadata.Select(m => referencedItem.Item.GetMetadataValue(m)), comparer);
}
}
}

private MetadataSet(StringComparer comparer)
{
_children = new Dictionary<string, MetadataSet<P, I>>(comparer);
}

// Relies on IEnumerable returning the metadata in a reasonable order. Reasonable?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The question is whether the matching IEnumerable<> is passed to the constructor and to Contains. It appears to be true although I wonder if there's a way to enforce it in the code.

private void Add(IEnumerable<string> metadata, StringComparer comparer)
{
MetadataSet<P, I> current = this;
foreach (string m in metadata)
{
string normalizedString = _normalize(m);
if (!current._children.TryGetValue(normalizedString, out MetadataSet<P, I> child))
{
child = new MetadataSet<P, I>(comparer);
current._children.Add(normalizedString, child);
}
current = child;
}
}

internal bool Contains(IEnumerable<string> metadata)
{
MetadataSet<P, I> current = this;
foreach (string m in metadata)
{
if (String.IsNullOrEmpty(m))
{
return false;
}
if (!current._children.TryGetValue(_normalize(m), out current))
{
return false;
}
}
return true;
}
}

public enum MatchOnMetadataOptions
{
CaseSensitive,
CaseInsensitive,
PathLike
}

public static class MatchOnMetadataConstants
{
public const MatchOnMetadataOptions MatchOnMetadataOptionsDefaultValue = MatchOnMetadataOptions.CaseSensitive;
}
}
38 changes: 17 additions & 21 deletions src/Build/Evaluation/LazyItemEvaluator.RemoveOperation.cs
Expand Up @@ -13,13 +13,22 @@ internal partial class LazyItemEvaluator<P, I, M, D>
class RemoveOperation : LazyItemOperation
{
readonly ImmutableList<string> _matchOnMetadata;
readonly MatchOnMetadataOptions _matchOnMetadataOptions;
private MetadataSet<P, I> _metadataSet;

public RemoveOperation(RemoveOperationBuilder builder, LazyItemEvaluator<P, I, M, D> lazyEvaluator)
: base(builder, lazyEvaluator)
{
_matchOnMetadata = builder.MatchOnMetadata.ToImmutable();
_matchOnMetadataOptions = builder.MatchOnMetadataOptions;

ProjectFileErrorUtilities.VerifyThrowInvalidProjectFile(
_matchOnMetadata.IsEmpty || _itemSpec.Fragments.All(f => f is ItemSpec<ProjectProperty, ProjectItem>.ItemExpressionFragment),
new BuildEventFileInfo(string.Empty),
"OM_MatchOnMetadataIsRestrictedToReferencedItems");

if (!_matchOnMetadata.IsEmpty)
{
_metadataSet = new MetadataSet<P, I>(builder.MatchOnMetadataOptions, _matchOnMetadata, _itemSpec);
}
}

/// <summary>
Expand All @@ -31,13 +40,6 @@ public RemoveOperation(RemoveOperationBuilder builder, LazyItemEvaluator<P, I, M
/// </remarks>
protected override void ApplyImpl(ImmutableList<ItemData>.Builder listBuilder, ImmutableHashSet<string> globsToIgnore)
{
var matchOnMetadataValid = !_matchOnMetadata.IsEmpty && _itemSpec.Fragments.Count == 1
&& _itemSpec.Fragments.First() is ItemSpec<ProjectProperty, ProjectItem>.ItemExpressionFragment;
ProjectFileErrorUtilities.VerifyThrowInvalidProjectFile(
_matchOnMetadata.IsEmpty || (matchOnMetadataValid && _matchOnMetadata.Count == 1),
new BuildEventFileInfo(string.Empty),
"OM_MatchOnMetadataIsRestrictedToOnlyOneReferencedItem");

if (_matchOnMetadata.IsEmpty && ItemspecContainsASingleBareItemReference(_itemSpec, _itemElement.ItemType) && _conditionResult)
{
// Perf optimization: If the Remove operation references itself (e.g. <I Remove="@(I)"/>)
Expand All @@ -55,13 +57,18 @@ protected override ImmutableList<I> SelectItems(ImmutableList<ItemData>.Builder
var items = ImmutableHashSet.CreateBuilder<I>();
foreach (ItemData item in listBuilder)
{
if (_matchOnMetadata.IsEmpty ? _itemSpec.MatchesItem(item.Item) : _itemSpec.MatchesItemOnMetadata(item.Item, _matchOnMetadata, _matchOnMetadataOptions))
if (_matchOnMetadata.IsEmpty ? _itemSpec.MatchesItem(item.Item) : MatchesItemOnMetadata(item.Item))
items.Add(item.Item);
}

return items.ToImmutableList();
}

private bool MatchesItemOnMetadata(I item)
{
return _metadataSet.Contains(_matchOnMetadata.Select(m => item.GetMetadataValue(m)));
}

protected override void SaveItems(ImmutableList<I> items, ImmutableList<ItemData>.Builder listBuilder)
{
if (!_conditionResult)
Expand Down Expand Up @@ -100,15 +107,4 @@ public RemoveOperationBuilder(ProjectItemElement itemElement, bool conditionResu
}
}
}

public enum MatchOnMetadataOptions
{
CaseSensitive,
CaseInsensitive,
PathLike
}

public static class MatchOnMetadataConstants {
public const MatchOnMetadataOptions MatchOnMetadataOptionsDefaultValue = MatchOnMetadataOptions.CaseSensitive;
}
}