-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
AveragedPerceptron.cs
237 lines (210 loc) · 13.2 KB
/
AveragedPerceptron.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using Microsoft.ML;
using Microsoft.ML.Calibrators;
using Microsoft.ML.CommandLine;
using Microsoft.ML.Data;
using Microsoft.ML.EntryPoints;
using Microsoft.ML.Model;
using Microsoft.ML.Numeric;
using Microsoft.ML.Runtime;
using Microsoft.ML.Trainers;
using static Microsoft.ML.Trainers.AveragedLinearOptions;
[assembly: LoadableClass(AveragedPerceptronTrainer.Summary, typeof(AveragedPerceptronTrainer), typeof(AveragedPerceptronTrainer.Options),
new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) },
AveragedPerceptronTrainer.UserNameValue,
AveragedPerceptronTrainer.LoadNameValue, "avgper", AveragedPerceptronTrainer.ShortName)]
[assembly: LoadableClass(typeof(void), typeof(AveragedPerceptronTrainer), null, typeof(SignatureEntryPointModule), "AP")]
namespace Microsoft.ML.Trainers
{
/// <summary>
/// The <see cref="IEstimator{TTransformer}"/> to predict a target using a linear binary classification model trained with the averaged perceptron.
/// </summary>
/// <remarks>
/// <format type="text/markdown"><![CDATA[
/// To create this trainer, use [AveragedPerceptron](xref:Microsoft.ML.StandardTrainersCatalog.AveragedPerceptron(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,Microsoft.ML.Trainers.IClassificationLoss,System.Single,System.Boolean,System.Single,System.Int32))
/// or [AveragedPerceptron(Options)](xref:Microsoft.ML.StandardTrainersCatalog.AveragedPerceptron(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.AveragedPerceptronTrainer.Options)).
///
/// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification-no-prob.md)]
///
/// ### Trainer Characteristics
/// | | |
/// | -- | -- |
/// | Machine learning task | Binary classification |
/// | Is normalization required? | Yes |
/// | Is caching required? | No |
/// | Required NuGet in addition to Microsoft.ML | None |
/// | Exportable to ONNX | Yes |
///
/// ### Training Algorithm Details
/// The perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane.
/// For instance, with feature values $f_0, f_1,..., f_{D-1}$, the prediction is given by determining what side of the hyperplane the point falls into.
/// That is the same as the sign of the feautures' weighted sum, i.e. $\sum_{i = 0}^{D-1} (w_i * f_i) + b$, where $w_0, w_1,..., w_{D-1}$
/// are the weights computed by the algorithm, and $b$ is the bias computed by the algorithm.
///
/// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time.
/// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features is computed.
/// If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs,
/// the weights vector is updated by either adding or subtracting (if the label is positive or negative, respectively) the feature vector of the current example,
/// multiplied by a factor 0 < a <= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
/// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
///
/// In Averaged Perceptron (aka voted-perceptron), for each iteration, i.e. pass through the training data, a weight vector is calculated as explained above.
/// The final prediction is then calculated by averaging the weighted sum from each weight vector and looking at the sign of the result.
///
/// For more information see [Wikipedia entry for Perceptron](https://en.wikipedia.org/wiki/Perceptron)
/// or [Large Margin Classification Using the Perceptron Algorithm](https://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8200).
///
/// Check the See Also section for links to usage examples.
/// ]]>
/// </format>
/// </remarks>
/// <seealso cref="StandardTrainersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, IClassificationLoss, float, bool, float, int)" />
/// <seealso cref="StandardTrainersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, AveragedPerceptronTrainer.Options)"/>
/// <seealso cref="Options"/>
public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPredictionTransformer<LinearBinaryModelParameters>, LinearBinaryModelParameters>
{
internal const string LoadNameValue = "AveragedPerceptron";
internal const string UserNameValue = "Averaged Perceptron";
internal const string ShortName = "ap";
internal const string Summary = "Averaged Perceptron Binary Classifier.";
private readonly Options _args;
internal class AveragedPerceptronDefault : AveragedDefault
{
public new const int NumberOfIterations = 10;
}
/// <summary>
/// Options for the <see cref="AveragedPerceptronTrainer"/> as used in
/// <see cref="Microsoft.ML.StandardTrainersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, Options)"/>.
/// </summary>
public sealed class Options : AveragedLinearOptions
{
public Options()
{
NumberOfIterations = AveragedPerceptronDefault.NumberOfIterations;
}
/// <summary>
/// A custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
/// </summary>
[Argument(ArgumentType.Multiple, Name = "LossFunction", HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
internal ISupportClassificationLossFactory ClassificationLossFunctionFactory = new HingeLoss.Options();
/// <summary>
/// A custom <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a>.
/// </summary>
public IClassificationLoss LossFunction { get; set; }
/// <summary>
/// The <a href="https://en.wikipedia.org/wiki/Calibration_(statistics)">calibrator</a> for producing probabilities. Default is exponential (aka Platt) calibration.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "The calibrator kind to apply to the predictor. Specify null for no calibration", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)]
internal ICalibratorTrainerFactory Calibrator = new PlattCalibratorTrainerFactory();
/// <summary>
/// The maximum number of examples to use when training the calibrator.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "The maximum number of examples to use when training the calibrator", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)]
internal int MaxCalibrationExamples = 1000000;
internal override IComponentFactory<IScalarLoss> LossFunctionFactory => ClassificationLossFunctionFactory;
}
private sealed class TrainState : AveragedTrainStateBase
{
public TrainState(IChannel ch, int numFeatures, LinearModelParameters predictor, AveragedPerceptronTrainer parent)
: base(ch, numFeatures, predictor, parent)
{
}
public override LinearBinaryModelParameters CreatePredictor()
{
Contracts.Assert(WeightsScale == 1);
VBuffer<float> weights = default;
float bias;
if (!Averaged)
{
Weights.CopyTo(ref weights);
bias = Bias;
}
else
{
TotalWeights.CopyTo(ref weights);
VectorUtils.ScaleBy(ref weights, 1 / (float)NumWeightUpdates);
bias = TotalBias / (float)NumWeightUpdates;
}
return new LinearBinaryModelParameters(ParentHost, in weights, bias);
}
}
internal AveragedPerceptronTrainer(IHostEnvironment env, Options options)
: base(options, env, UserNameValue, TrainerUtils.MakeBoolScalarLabel(options.LabelColumnName))
{
_args = options;
LossFunction = _args.LossFunction ?? _args.LossFunctionFactory.CreateComponent(env);
}
/// <summary>
/// Trains a linear binary classifier using the averaged perceptron.
/// <a href='https://en.wikipedia.org/wiki/Perceptron'>Wikipedia entry for Perceptron</a>
/// </summary>
/// <param name="env">The local instance of the <see cref="IHostEnvironment"/></param>
/// <param name="lossFunction">The classification loss function. </param>
/// <param name="labelColumnName">The name of the label column. </param>
/// <param name="featureColumnName">The name of the feature column.</param>
/// <param name="learningRate">The learning rate. </param>
/// <param name="decreaseLearningRate">Whether to decrease learning rate as iterations progress.</param>
/// <param name="l2Regularization">Weight of L2 regularization term.</param>
/// <param name="numberOfIterations">The number of training iterations.</param>
internal AveragedPerceptronTrainer(IHostEnvironment env,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
IClassificationLoss lossFunction = null,
float learningRate = AveragedPerceptronDefault.LearningRate,
bool decreaseLearningRate = AveragedPerceptronDefault.DecreaseLearningRate,
float l2Regularization = AveragedPerceptronDefault.L2Regularization,
int numberOfIterations = AveragedPerceptronDefault.NumberOfIterations)
: this(env, new Options
{
LabelColumnName = labelColumnName,
FeatureColumnName = featureColumnName,
LearningRate = learningRate,
DecreaseLearningRate = decreaseLearningRate,
L2Regularization = l2Regularization,
NumberOfIterations = numberOfIterations,
LossFunction = lossFunction
})
{
}
private protected override PredictionKind PredictionKind => PredictionKind.BinaryClassification;
private protected override bool NeedCalibration => true;
private protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape inputSchema)
{
return new[]
{
// REVIEW AP is currently not calibrating. Add the probability column after fixing the behavior.
new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false, new SchemaShape(AnnotationUtils.GetTrainerOutputAnnotation())),
new SchemaShape.Column(DefaultColumnNames.PredictedLabel, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false, new SchemaShape(AnnotationUtils.GetTrainerOutputAnnotation()))
};
}
private protected override void CheckLabels(RoleMappedData data)
{
Contracts.AssertValue(data);
data.CheckBinaryLabel();
}
private protected override TrainStateBase MakeState(IChannel ch, int numFeatures, LinearModelParameters predictor)
{
return new TrainState(ch, numFeatures, predictor, this);
}
private protected override BinaryPredictionTransformer<LinearBinaryModelParameters> MakeTransformer(LinearBinaryModelParameters model, DataViewSchema trainSchema)
=> new BinaryPredictionTransformer<LinearBinaryModelParameters>(Host, model, trainSchema, FeatureColumn.Name);
[TlcModule.EntryPoint(Name = "Trainers.AveragedPerceptronBinaryClassifier",
Desc = Summary,
UserName = UserNameValue,
ShortName = ShortName)]
internal static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Options input)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register("TrainAP");
host.CheckValue(input, nameof(input));
EntryPointUtils.CheckInputArgs(host, input);
return TrainerEntryPointsUtils.Train<Options, CommonOutputs.BinaryClassificationOutput>(host, input,
() => new AveragedPerceptronTrainer(host, input),
() => TrainerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumnName),
calibrator: input.Calibrator, maxCalibrationExamples: input.MaxCalibrationExamples);
}
}
}