forked from dotnet/msbuild
-
Notifications
You must be signed in to change notification settings - Fork 0
/
OpportunisticIntern.cs
1093 lines (960 loc) · 46.7 KB
/
OpportunisticIntern.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
using System;
#if !CLR2COMPATIBILITY
using System.Collections.Concurrent;
#endif
using System.Text;
using System.Linq;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using Microsoft.Build.Shared;
using Microsoft.Build.Utilities;
namespace Microsoft.Build
{
/// <summary>
/// This class is used to selectively intern strings. It should be used at the point of new string creation.
/// For example,
///
/// string interned = OpportunisticIntern.Intern(String.Join(",",someStrings));
///
/// There are currently two underlying implementations. The new default one in WeakStringCacheInterner is based on weak GC handles.
/// The legacy one in BucketedPrioritizedStringList is available only as an escape hatch by setting an environment variable.
///
/// The legacy implementation uses heuristics to decide whether it will be efficient to intern a string or not. There is no
/// guarantee that a string will intern.
///
/// The thresholds and sizes were determined by experimentation to give the best number of bytes saved
/// at reasonable elapsed time cost.
///
/// The new implementation interns all strings but maintains only weak references so it doesn't keep the strings alive.
/// </summary>
internal sealed class OpportunisticIntern
{
/// <summary>
/// Defines the interner interface as we currently implement more than one.
/// </summary>
private interface IInternerImplementation
{
/// <summary>
/// Converts the given internable candidate to its string representation. Efficient implementions have side-effects
/// of caching the results to end up with as few duplicates on the managed heap as practical.
/// </summary>
string InterningToString<T>(T candidate) where T : IInternable;
/// <summary>
/// Prints implementation specific interning statistics to the console.
/// </summary>
/// <param name="heading">A string identifying the interner in the output.</param>
void ReportStatistics(string heading);
}
/// <summary>
/// The singleton instance of OpportunisticIntern.
/// </summary>
private static OpportunisticIntern _instance = new OpportunisticIntern();
internal static OpportunisticIntern Instance => _instance;
private readonly bool _useLegacyInterner = Traits.Instance.UseLegacyStringInterner;
private readonly bool _useSimpleConcurrency = Traits.Instance.UseSimpleInternConcurrency;
/// <summary>
/// The size of the small mru list.
/// </summary>
private readonly int _smallMruSize;
/// <summary>
/// The size of the large mru list.
/// </summary>
private readonly int _largeMruSize;
/// <summary>
/// The size of the huge mru list.
/// </summary>
private readonly int _hugeMruSize;
/// <summary>
/// The smallest size a string can be to be considered small.
/// </summary>
private readonly int _smallMruThreshold;
/// <summary>
/// The smallest size a string can be to be considered large.
/// </summary>
private readonly int _largeMruThreshold;
/// <summary>
/// The smallest size a string can be to be considered huge.
/// </summary>
private readonly int _hugeMruThreshold;
/// <summary>
/// The smallest size a string can be to be ginormous.
/// 8K for large object heap.
/// </summary>
private readonly int _ginormousThreshold;
/// <summary>
/// The interner implementation in use.
/// </summary>
private IInternerImplementation _interner;
#region Statistics
/// <summary>
/// What if Mru lists were infinitely long?
/// </summary>
private BucketedPrioritizedStringList _whatIfInfinite;
/// <summary>
/// What if we doubled the size of the Mru lists?
/// </summary>
private BucketedPrioritizedStringList _whatIfDoubled;
/// <summary>
/// What if we halved the size of the Mru lists?
/// </summary>
private BucketedPrioritizedStringList _whatIfHalved;
/// <summary>
/// What if the size of Mru lists was zero? (We still intern tiny strings in this case)
/// </summary>
private BucketedPrioritizedStringList _whatIfZero;
#endregion
private OpportunisticIntern()
{
_smallMruSize = AssignViaEnvironment("MSBUILDSMALLINTERNSIZE", 50);
_largeMruSize = AssignViaEnvironment("MSBUILDLARGEINTERNSIZE", 100);
_hugeMruSize = AssignViaEnvironment("MSBUILDHUGEINTERNSIZE", 100);
_smallMruThreshold = AssignViaEnvironment("MSBUILDSMALLINTERNTHRESHOLD", 50);
_largeMruThreshold = AssignViaEnvironment("MSBUILDLARGEINTERNTHRESHOLD", 70);
_hugeMruThreshold = AssignViaEnvironment("MSBUILDHUGEINTERNTHRESHOLD", 200);
_ginormousThreshold = AssignViaEnvironment("MSBUILDGINORMOUSINTERNTHRESHOLD", 8000);
_interner = _useLegacyInterner
? (IInternerImplementation)new BucketedPrioritizedStringList(gatherStatistics: false, _smallMruSize, _largeMruSize, _hugeMruSize,
_smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency)
: (IInternerImplementation)new WeakStringCacheInterner(gatherStatistics: false);
}
/// <summary>
/// Recreates the singleton instance based on the current environment (test only).
/// </summary>
internal static void ResetForTests()
{
Debug.Assert(BuildEnvironmentHelper.Instance.RunningTests);
_instance = new OpportunisticIntern();
}
/// <summary>
/// Assign an int from an environment variable. If its not present, use the default.
/// </summary>
private int AssignViaEnvironment(string env, int @default)
{
string threshold = Environment.GetEnvironmentVariable(env);
if (!string.IsNullOrEmpty(threshold))
{
if (int.TryParse(threshold, out int result))
{
return result;
}
}
return @default;
}
/// <summary>
/// Turn on statistics gathering.
/// </summary>
internal void EnableStatisticsGathering()
{
if (_useLegacyInterner)
{
// Statistics include several 'what if' scenarios such as doubling the size of the MRU lists.
_interner = new BucketedPrioritizedStringList(gatherStatistics: true, _smallMruSize, _largeMruSize, _hugeMruSize, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency);
_whatIfInfinite = new BucketedPrioritizedStringList(gatherStatistics: true, int.MaxValue, int.MaxValue, int.MaxValue, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency);
_whatIfDoubled = new BucketedPrioritizedStringList(gatherStatistics: true, _smallMruSize * 2, _largeMruSize * 2, _hugeMruSize * 2, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency);
_whatIfHalved = new BucketedPrioritizedStringList(gatherStatistics: true, _smallMruSize / 2, _largeMruSize / 2, _hugeMruSize / 2, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency);
_whatIfZero = new BucketedPrioritizedStringList(gatherStatistics: true, 0, 0, 0, _smallMruThreshold, _largeMruThreshold, _hugeMruThreshold, _ginormousThreshold, _useSimpleConcurrency);
}
else
{
_interner = new WeakStringCacheInterner(gatherStatistics: true);
}
}
/// <summary>
/// Intern the given internable.
/// </summary>
internal static string InternableToString<T>(T candidate) where T : IInternable
{
return Instance.InternableToStringImpl(candidate);
}
/// <summary>
/// Potentially Intern the given string builder.
/// </summary>
internal static string StringBuilderToString(StringBuilder candidate)
{
return Instance.InternableToStringImpl(new StringBuilderInternTarget(candidate));
}
/// <summary>
/// Potentially Intern the given char array.
/// </summary>
internal static string CharArrayToString(char[] candidate, int count)
{
return Instance.InternableToStringImpl(new CharArrayInternTarget(candidate, count));
}
/// <summary>
/// Potentially Intern the given char array.
/// </summary>
internal static string CharArrayToString(char[] candidate, int startIndex, int count)
{
return Instance.InternableToStringImpl(new CharArrayInternTarget(candidate, startIndex, count));
}
/// <summary>
/// Potentially Intern the given string.
/// </summary>
/// <param name="candidate">The string to intern.</param>
/// <returns>The interned string, or the same string if it could not be interned.</returns>
internal static string InternStringIfPossible(string candidate)
{
return Instance.InternableToStringImpl(new StringInternTarget(candidate));
}
/// <summary>
/// Intern the given internable.
/// </summary>
private string InternableToStringImpl<T>(T candidate) where T : IInternable
{
if (candidate.Length == 0)
{
// As in the case that a property or itemlist has evaluated to empty.
return string.Empty;
}
if (_whatIfInfinite != null)
{
_whatIfInfinite.InterningToString(candidate);
_whatIfDoubled.InterningToString(candidate);
_whatIfHalved.InterningToString(candidate);
_whatIfZero.InterningToString(candidate);
}
string result = _interner.InterningToString(candidate);
#if DEBUG
string expected = candidate.ExpensiveConvertToString();
if (!String.Equals(result, expected))
{
ErrorUtilities.ThrowInternalError("Interned string {0} should have been {1}", result, expected);
}
#endif
return result;
}
/// <summary>
/// Report statistics about interning. Don't call unless GatherStatistics has been called beforehand.
/// </summary>
internal void ReportStatistics()
{
_interner.ReportStatistics("Main");
if (_useLegacyInterner)
{
_whatIfInfinite.ReportStatistics("if Infinite");
_whatIfDoubled.ReportStatistics("if Doubled");
_whatIfHalved.ReportStatistics("if Halved");
_whatIfZero.ReportStatistics("if Zero");
Console.WriteLine(" * Even for MRU size of zero there will still be some intern hits because of the tiny ");
Console.WriteLine(" string matching (eg. 'true')");
}
}
private static bool TryInternHardcodedString<T>(T candidate, string str, ref string interned) where T : IInternable
{
Debug.Assert(candidate.Length == str.Length);
if (candidate.StartsWithStringByOrdinalComparison(str))
{
interned = str;
return true;
}
return false;
}
/// <summary>
/// Try to match the candidate with small number of hardcoded interned string literals.
/// The return value indicates how the string was interned (if at all).
/// </summary>
/// <returns>
/// True if the candidate matched a hardcoded literal, null if it matched a "do not intern" string, false otherwise.
/// </returns>
private static bool? TryMatchHardcodedStrings<T>(T candidate, out string interned) where T : IInternable
{
int length = candidate.Length;
interned = null;
// Each of the hard-coded small strings below showed up in a profile run with considerable duplication in memory.
if (length == 2)
{
if (candidate[1] == '#')
{
if (candidate[0] == 'C')
{
interned = "C#";
return true;
}
if (candidate[0] == 'F')
{
interned = "F#";
return true;
}
}
if (candidate[0] == 'V' && candidate[1] == 'B')
{
interned = "VB";
return true;
}
}
else if (length == 4)
{
if (TryInternHardcodedString(candidate, "TRUE", ref interned) ||
TryInternHardcodedString(candidate, "True", ref interned) ||
TryInternHardcodedString(candidate, "Copy", ref interned) ||
TryInternHardcodedString(candidate, "true", ref interned) ||
TryInternHardcodedString(candidate, "v4.0", ref interned))
{
return true;
}
}
else if (length == 5)
{
if (TryInternHardcodedString(candidate, "FALSE", ref interned) ||
TryInternHardcodedString(candidate, "false", ref interned) ||
TryInternHardcodedString(candidate, "Debug", ref interned) ||
TryInternHardcodedString(candidate, "Build", ref interned) ||
TryInternHardcodedString(candidate, "Win32", ref interned))
{
return true;
}
}
else if (length == 6)
{
if (TryInternHardcodedString(candidate, "''!=''", ref interned) ||
TryInternHardcodedString(candidate, "AnyCPU", ref interned))
{
return true;
}
}
else if (length == 7)
{
if (TryInternHardcodedString(candidate, "Library", ref interned) ||
TryInternHardcodedString(candidate, "MSBuild", ref interned) ||
TryInternHardcodedString(candidate, "Release", ref interned))
{
return true;
}
}
// see Microsoft.Build.BackEnd.BuildRequestConfiguration.CreateUniqueGlobalProperty
else if (length > MSBuildConstants.MSBuildDummyGlobalPropertyHeader.Length &&
candidate.StartsWithStringByOrdinalComparison(MSBuildConstants.MSBuildDummyGlobalPropertyHeader))
{
// don't want to leak unique strings into the cache
interned = candidate.ExpensiveConvertToString();
return null;
}
else if (length == 24)
{
if (TryInternHardcodedString(candidate, "ResolveAssemblyReference", ref interned))
{
return true;
}
}
return false;
}
/// <summary>
/// Implements interning based on a WeakStringCache (new implementation).
/// </summary>
private class WeakStringCacheInterner : IInternerImplementation
{
/// <summary>
/// Enumerates the possible interning results.
/// </summary>
private enum InternResult
{
MatchedHardcodedString,
FoundInWeakStringCache,
AddedToWeakStringCache,
RejectedFromInterning
}
/// <summary>
/// The cache to keep strings in.
/// </summary>
private readonly WeakStringCache _weakStringCache = new WeakStringCache();
#region Statistics
/// <summary>
/// Whether or not to gather statistics.
/// </summary>
private readonly bool _gatherStatistics;
/// <summary>
/// Number of times interning with hardcoded string literals worked.
/// </summary>
private int _hardcodedInternHits;
/// <summary>
/// Number of times the regular interning path found the string in the cache.
/// </summary>
private int _regularInternHits;
/// <summary>
/// Number of times the regular interning path added the string to the cache.
/// </summary>
private int _regularInternMisses;
/// <summary>
/// Number of times interning wasn't attempted.
/// </summary>
private int _rejectedStrings;
/// <summary>
/// Total number of strings eliminated by interning.
/// </summary>
private int _internEliminatedStrings;
/// <summary>
/// Total number of chars eliminated across all strings.
/// </summary>
private int _internEliminatedChars;
/// <summary>
/// Maps strings that went though the regular (i.e. not hardcoded) interning path to the number of times they have been
/// seen. The higher the number the better the payoff if the string had been hardcoded.
/// </summary>
private Dictionary<string, int> _missedHardcodedStrings;
#endregion
public WeakStringCacheInterner(bool gatherStatistics)
{
if (gatherStatistics)
{
_missedHardcodedStrings = new Dictionary<string, int>();
}
_gatherStatistics = gatherStatistics;
}
/// <summary>
/// Intern the given internable.
/// </summary>
public string InterningToString<T>(T candidate) where T : IInternable
{
if (_gatherStatistics)
{
return InternWithStatistics(candidate);
}
else
{
TryIntern(candidate, out string result);
return result;
}
}
/// <summary>
/// Report statistics to the console.
/// </summary>
public void ReportStatistics(string heading)
{
string title = "Opportunistic Intern (" + heading + ")";
Console.WriteLine("\n{0}{1}{0}", new string('=', 41 - (title.Length / 2)), title);
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Hardcoded Hits", _hardcodedInternHits, "hits");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Hardcoded Rejects", _rejectedStrings, "rejects");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "WeakStringCache Hits", _regularInternHits, "hits");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "WeakStringCache Misses", _regularInternMisses, "misses");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Strings*", _internEliminatedStrings, "strings");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Chars", _internEliminatedChars, "chars");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Estimated Eliminated Bytes", _internEliminatedChars * 2, "bytes");
Console.WriteLine("Elimination assumes that strings provided were unique objects.");
Console.WriteLine("|---------------------------------------------------------------------------------|");
IEnumerable<string> topMissingHardcodedString =
_missedHardcodedStrings
.OrderByDescending(kv => kv.Value * kv.Key.Length)
.Take(15)
.Where(kv => kv.Value > 1)
.Select(kv => string.Format(CultureInfo.InvariantCulture, "({1} instances x each {2} chars)\n{0}", kv.Key, kv.Value, kv.Key.Length));
Console.WriteLine("##########Top Missing Hardcoded Strings: \n{0} ", string.Join("\n==============\n", topMissingHardcodedString.ToArray()));
Console.WriteLine();
WeakStringCache.DebugInfo debugInfo = _weakStringCache.GetDebugInfo();
Console.WriteLine("WeakStringCache statistics:");
Console.WriteLine("String count live/collected/total = {0}/{1}/{2}", debugInfo.LiveStringCount, debugInfo.CollectedStringCount, debugInfo.LiveStringCount + debugInfo.CollectedStringCount);
}
/// <summary>
/// Try to intern the string.
/// The return value indicates the how the string was interned (if at all).
/// </summary>
private InternResult TryIntern<T>(T candidate, out string interned) where T : IInternable
{
// First, try the hard coded intern strings.
bool? hardcodedMatchResult = TryMatchHardcodedStrings(candidate, out interned);
if (hardcodedMatchResult != false)
{
// Either matched a hardcoded string or is explicitly not to be interned.
return hardcodedMatchResult.HasValue ? InternResult.MatchedHardcodedString : InternResult.RejectedFromInterning;
}
interned = _weakStringCache.GetOrCreateEntry(candidate, out bool cacheHit);
return cacheHit ? InternResult.FoundInWeakStringCache : InternResult.AddedToWeakStringCache;
}
/// <summary>
/// Version of Intern that gathers statistics
/// </summary>
private string InternWithStatistics<T>(T candidate) where T : IInternable
{
lock (_missedHardcodedStrings)
{
InternResult internResult = TryIntern(candidate, out string result);
switch (internResult)
{
case InternResult.MatchedHardcodedString:
_hardcodedInternHits++;
break;
case InternResult.FoundInWeakStringCache:
_regularInternHits++;
break;
case InternResult.AddedToWeakStringCache:
_regularInternMisses++;
break;
case InternResult.RejectedFromInterning:
_rejectedStrings++;
break;
}
if (internResult != InternResult.MatchedHardcodedString && internResult != InternResult.RejectedFromInterning)
{
_missedHardcodedStrings.TryGetValue(result, out int priorCount);
_missedHardcodedStrings[result] = priorCount + 1;
}
if (!candidate.ReferenceEquals(result))
{
// Reference changed so 'candidate' is now released and should save memory.
_internEliminatedStrings++;
_internEliminatedChars += candidate.Length;
}
return result;
}
}
}
/// <summary>
/// Manages a set of mru lists that hold strings in varying size ranges (legacy implementation).
/// </summary>
private class BucketedPrioritizedStringList : IInternerImplementation
{
/// <summary>
/// The small string Mru list.
/// </summary>
private readonly PrioritizedStringList _smallMru;
/// <summary>
/// The large string Mru list.
/// </summary>
private readonly PrioritizedStringList _largeMru;
/// <summary>
/// The huge string Mru list.
/// </summary>
private readonly PrioritizedStringList _hugeMru;
/// <summary>
/// Three most recently used strings over 8K.
/// </summary>
private readonly LinkedList<WeakReference> _ginormous = new LinkedList<WeakReference>();
/// <summary>
/// The smallest size a string can be to be considered small.
/// </summary>
private readonly int _smallMruThreshold;
/// <summary>
/// The smallest size a string can be to be considered large.
/// </summary>
private readonly int _largeMruThreshold;
/// <summary>
/// The smallest size a string can be to be considered huge.
/// </summary>
private readonly int _hugeMruThreshold;
/// <summary>
/// The smallest size a string can be to be ginormous.
/// </summary>
private readonly int _ginormousThreshold;
private readonly bool _useSimpleConcurrency;
#if !CLR2COMPATIBILITY
// ConcurrentDictionary starts with capacity 31 but we're usually adding far more than that. Make a better first capacity guess to reduce
// ConcurrentDictionary having to take all internal locks to upgrade its bucket list. Note that the number should be prime per the
// comments on the code at https://referencesource.microsoft.com/#mscorlib/system/Collections/Concurrent/ConcurrentDictionary.cs,122
// Also note default lock count is NativeMethodsShared.GetLogicalCoreCount() from the same code.
private const int InitialCapacity = 2053;
private readonly ConcurrentDictionary<string, string> _internedStrings = new ConcurrentDictionary<string, string>(NativeMethodsShared.GetLogicalCoreCount(), InitialCapacity, StringComparer.Ordinal);
#endif
#region Statistics
/// <summary>
/// Whether or not to gather statistics
/// </summary>
private readonly bool _gatherStatistics;
/// <summary>
/// Number of times interning worked.
/// </summary>
private int _internHits;
/// <summary>
/// Number of times interning didn't work.
/// </summary>
private int _internMisses;
/// <summary>
/// Number of times interning wasn't attempted.
/// </summary>
private int _internRejects;
/// <summary>
/// Total number of strings eliminated by interning.
/// </summary>
private int _internEliminatedStrings;
/// <summary>
/// Total number of chars eliminated across all strings.
/// </summary>
private int _internEliminatedChars;
/// <summary>
/// Number of times the ginourmous string hit.
/// </summary>
private int _ginormousHits;
/// <summary>
/// Number of times the ginourmous string missed.
/// </summary>
private int _ginormousMisses;
/// <summary>
/// Chars interned for ginormous range.
/// </summary>
private int _ginormousCharsSaved;
/// <summary>
/// Whether or not to track ginormous strings.
/// </summary>
private readonly bool _dontTrack;
/// <summary>
/// The time spent interning.
/// </summary>
private readonly Stopwatch _stopwatch;
/// <summary>
/// Strings which did not intern
/// </summary>
private readonly Dictionary<string, int> _missedStrings;
/// <summary>
/// Strings which we didn't attempt to intern
/// </summary>
private readonly Dictionary<string, int> _rejectedStrings;
/// <summary>
/// Number of ginormous strings to keep
/// By observation of Auto7, there are about three variations of the huge solution config blob
/// There aren't really any other strings of this size, but make it 10 to be sure. (There will barely be any misses)
/// </summary>
private const int GinormousSize = 10;
#endregion
/// <summary>
/// Construct.
/// </summary>
internal BucketedPrioritizedStringList(bool gatherStatistics, int smallMruSize, int largeMruSize, int hugeMruSize, int smallMruThreshold, int largeMruThreshold, int hugeMruThreshold, int ginormousThreshold, bool useSimpleConcurrency)
{
if (smallMruSize == 0 && largeMruSize == 0 && hugeMruSize == 0)
{
_dontTrack = true;
}
_smallMru = new PrioritizedStringList(smallMruSize);
_largeMru = new PrioritizedStringList(largeMruSize);
_hugeMru = new PrioritizedStringList(hugeMruSize);
_smallMruThreshold = smallMruThreshold;
_largeMruThreshold = largeMruThreshold;
_hugeMruThreshold = hugeMruThreshold;
_ginormousThreshold = ginormousThreshold;
_useSimpleConcurrency = useSimpleConcurrency;
for (int i = 0; i < GinormousSize; i++)
{
_ginormous.AddFirst(new WeakReference(string.Empty));
}
_gatherStatistics = gatherStatistics;
if (gatherStatistics)
{
_stopwatch = new Stopwatch();
_missedStrings = new Dictionary<string, int>(StringComparer.Ordinal);
_rejectedStrings = new Dictionary<string, int>(StringComparer.Ordinal);
}
}
/// <summary>
/// Intern the given internable.
/// </summary>
public string InterningToString<T>(T candidate) where T : IInternable
{
if (_gatherStatistics)
{
return InternWithStatistics(candidate);
}
else
{
TryIntern(candidate, out string result);
return result;
}
}
/// <summary>
/// Report statistics to the console.
/// </summary>
public void ReportStatistics(string heading)
{
string title = "Opportunistic Intern (" + heading + ")";
Console.WriteLine("\n{0}{1}{0}", new string('=', 41 - (title.Length / 2)), title);
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Intern Hits", _internHits, "hits");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Intern Misses", _internMisses, "misses");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Intern Rejects (as shorter than " + _smallMruThreshold + " bytes)", _internRejects, "rejects");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Strings*", _internEliminatedStrings, "strings");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Eliminated Chars", _internEliminatedChars, "chars");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Estimated Eliminated Bytes", _internEliminatedChars * 2, "bytes");
Console.WriteLine("Elimination assumes that strings provided were unique objects.");
Console.WriteLine("|---------------------------------------------------------------------------------|");
KeyValuePair<int, int> held = _smallMru.Statistics();
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Small Strings MRU Size", Instance._smallMruSize, "strings");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Small Strings (>=" + _smallMruThreshold + " chars) Held", held.Key, "strings");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Small Estimated Bytes Held", held.Value * 2, "bytes");
Console.WriteLine("|---------------------------------------------------------------------------------|");
held = _largeMru.Statistics();
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Large Strings MRU Size", Instance._largeMruSize, "strings");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Large Strings (>=" + _largeMruThreshold + " chars) Held", held.Key, "strings");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Large Estimated Bytes Held", held.Value * 2, "bytes");
Console.WriteLine("|---------------------------------------------------------------------------------|");
held = _hugeMru.Statistics();
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Huge Strings MRU Size", Instance._hugeMruSize, "strings");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Huge Strings (>=" + _hugeMruThreshold + " chars) Held", held.Key, "strings");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Huge Estimated Bytes Held", held.Value * 2, "bytes");
Console.WriteLine("|---------------------------------------------------------------------------------|");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Ginormous Strings MRU Size", GinormousSize, "strings");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Ginormous (>=" + _ginormousThreshold + " chars) Hits", _ginormousHits, "hits");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Ginormous Misses", _ginormousMisses, "misses");
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Ginormous Chars Saved", _ginormousCharsSaved, "chars");
Console.WriteLine("|---------------------------------------------------------------------------------|");
// There's no point in reporting the ginormous string because it will have evaporated by now.
Console.WriteLine("||{0,50}|{1,20:N0}|{2,8}|", "Time Spent Interning", _stopwatch.ElapsedMilliseconds, "ms");
Console.WriteLine("{0}{0}", new string('=', 41));
IEnumerable<string> topMissingString =
_missedStrings
.OrderByDescending(kv => kv.Value * kv.Key.Length)
.Take(15)
.Where(kv => kv.Value > 1)
.Select(kv => string.Format(CultureInfo.InvariantCulture, "({1} instances x each {2} chars = {3}KB wasted)\n{0}", kv.Key, kv.Value, kv.Key.Length, (kv.Value - 1) * kv.Key.Length * 2 / 1024));
Console.WriteLine("##########Top Missed Strings: \n{0} ", string.Join("\n==============\n", topMissingString.ToArray()));
Console.WriteLine();
IEnumerable<string> topRejectedString =
_rejectedStrings
.OrderByDescending(kv => kv.Value * kv.Key.Length)
.Take(15)
.Where(kv => kv.Value > 1)
.Select(kv => string.Format(CultureInfo.InvariantCulture, "({1} instances x each {2} chars = {3}KB wasted)\n{0}", kv.Key, kv.Value, kv.Key.Length, (kv.Value - 1) * kv.Key.Length * 2 / 1024));
Console.WriteLine("##########Top Rejected Strings: \n{0} ", string.Join("\n==============\n", topRejectedString.ToArray()));
}
/// <summary>
/// Try to intern the string.
/// Return true if an interned value could be returned.
/// Return false if it was added to the intern list, but wasn't there already.
/// Return null if it didn't meet the length criteria for any of the buckets. Interning was rejected
/// </summary>
private bool? TryIntern<T>(T candidate, out string interned) where T : IInternable
{
int length = candidate.Length;
interned = null;
// First, try the hard coded intern strings.
// Each of the hard-coded small strings below showed up in a profile run with considerable duplication in memory.
if (!_dontTrack)
{
bool? hardcodedMatchResult = TryMatchHardcodedStrings(candidate, out interned);
if (hardcodedMatchResult != false)
{
// Either matched a hardcoded string or is explicitly not to be interned.
return hardcodedMatchResult;
}
if (length > _ginormousThreshold)
{
lock (_ginormous)
{
LinkedListNode<WeakReference> current = _ginormous.First;
while (current != null)
{
if (current.Value.Target is string last && last.Length == candidate.Length && candidate.StartsWithStringByOrdinalComparison(last))
{
interned = last;
_ginormousHits++;
_ginormousCharsSaved += last.Length;
_ginormous.Remove(current);
_ginormous.AddFirst(current);
return true;
}
current = current.Next;
}
_ginormousMisses++;
interned = candidate.ExpensiveConvertToString();
LinkedListNode<WeakReference> lastNode = _ginormous.Last;
_ginormous.RemoveLast();
_ginormous.AddFirst(lastNode);
lastNode.Value.Target = interned;
return false;
}
}
#if !CLR2COMPATIBILITY
else if (_useSimpleConcurrency)
{
var stringified = candidate.ExpensiveConvertToString();
interned = _internedStrings.GetOrAdd(stringified, stringified);
return true;
}
#endif
else if (length >= _hugeMruThreshold)
{
lock (_hugeMru)
{
return _hugeMru.TryGet(candidate, out interned);
}
}
else if (length >= _largeMruThreshold)
{
lock (_largeMru)
{
return _largeMru.TryGet(candidate, out interned);
}
}
else if (length >= _smallMruThreshold)
{
lock (_smallMru)
{
return _smallMru.TryGet(candidate, out interned);
}
}
}
interned = candidate.ExpensiveConvertToString();
return null;
}
/// <summary>
/// Version of Intern that gathers statistics
/// </summary>
private string InternWithStatistics<T>(T candidate) where T : IInternable
{
lock (_missedStrings)
{
_stopwatch.Start();
bool? interned = TryIntern(candidate, out string result);
_stopwatch.Stop();
if (interned.HasValue && !interned.Value)
{
// Could not intern.
_internMisses++;
_missedStrings.TryGetValue(result, out int priorCount);
_missedStrings[result] = priorCount + 1;
return result;
}
else if (interned == null)
{
// Decided not to attempt interning
_internRejects++;
_rejectedStrings.TryGetValue(result, out int priorCount);
_rejectedStrings[result] = priorCount + 1;
return result;
}
_internHits++;
if (!candidate.ReferenceEquals(result))
{
// Reference changed so 'candidate' is now released and should save memory.
_internEliminatedStrings++;
_internEliminatedChars += candidate.Length;
}
return result;
}
}
/// <summary>
/// A singly linked list of strings where the most recently accessed string is at the top.
/// Size expands up to a fixed number of strings.
/// </summary>
private class PrioritizedStringList
{
/// <summary>
/// Maximum size of the mru list.
/// </summary>
private readonly int _size;
/// <summary>
/// Head of the mru list.
/// </summary>
private Node _mru;
/// <summary>
/// Construct an Mru list with a fixed maximum size.
/// </summary>
internal PrioritizedStringList(int size)
{
_size = size;
}
/// <summary>
/// Try to get one element from the list. Upon leaving the function 'candidate' will be at the head of the Mru list.
/// This function is not thread-safe.
/// </summary>
internal bool TryGet<T>(T candidate, out string interned) where T : IInternable
{
if (_size == 0)
{
interned = candidate.ExpensiveConvertToString();
return false;
}
int length = candidate.Length;
Node secondPrior = null;
Node prior = null;
Node head = _mru;
bool found = false;
int itemCount = 0;
while (head != null && !found)
{
if (head.Value.Length == length)
{
if (candidate.StartsWithStringByOrdinalComparison(head.Value))
{
found = true;
}
}
if (!found)
{
secondPrior = prior;
prior = head;
head = head.Next;
}