Merge official main

* remotes/official/main: Update lgbm to v2.3.1 (dotnet#5851) Speed-up bitmap operations on images. Fixes dotnet#5856 (dotnet#5857) Onnx recursion limit (dotnet#5840) Speed up the inference of the saved_model(s). Fixes dotnet#5847 (dotnet#5848) Signed-off-by: darth-vader-lg <luigi.generale@gmail.com>
darth-vader-lg · Jun 26, 2021 · f11c475 · f11c475
2 parents 6aa56a3 + 1b3cb77
commit f11c475
Show file tree

Hide file tree

Showing 7 changed files with 115 additions and 28 deletions.
diff --git a/eng/Versions.props b/eng/Versions.props
@@ -21,7 +21,7 @@
     <SystemThreadingChannelsPackageVersion>4.7.1</SystemThreadingChannelsPackageVersion>
     <!-- Other/External dependencies -->
     <GoogleProtobufPackageVersion>3.10.1</GoogleProtobufPackageVersion>
-    <LightGBMPackageVersion>2.2.3</LightGBMPackageVersion>
+    <LightGBMPackageVersion>2.3.1</LightGBMPackageVersion>
     <MicrosoftExtensionsPackageVersion>2.1.0</MicrosoftExtensionsPackageVersion>
     <MicrosoftMLOnnxRuntimePackageVersion>1.6.0</MicrosoftMLOnnxRuntimePackageVersion>
     <MlNetMklDepsPackageVersion>0.0.0.9</MlNetMklDepsPackageVersion>

diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs b/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs
@@ -53,7 +53,7 @@ public static class OnnxCatalog
         /// <param name="modelFile">The path of the file containing the ONNX model.</param>
         /// <param name="shapeDictionary">ONNX shapes to be used over those loaded from <paramref name="modelFile"/>.
         /// For keys use names as stated in the ONNX model, e.g. "input". Stating the shapes with this parameter
-        /// is particullarly useful for working with variable dimension inputs and outputs.
+        /// is particularly useful for working with variable dimension inputs and outputs.
         /// </param>
         /// <param name="gpuDeviceId">Optional GPU device ID to run execution on, <see langword="null" /> to run on CPU.</param>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
@@ -110,7 +110,7 @@ public static class OnnxCatalog
         /// <param name="modelFile">The path of the file containing the ONNX model.</param>
         /// <param name="shapeDictionary">ONNX shapes to be used over those loaded from <paramref name="modelFile"/>.
         /// For keys use names as stated in the ONNX model, e.g. "input". Stating the shapes with this parameter
-        /// is particullarly useful for working with variable dimension inputs and outputs.
+        /// is particularly useful for working with variable dimension inputs and outputs.
         /// </param>
         /// <param name="gpuDeviceId">Optional GPU device ID to run execution on, <see langword="null" /> to run on CPU.</param>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
@@ -162,7 +162,7 @@ public static class OnnxCatalog
         /// <param name="modelFile">The path of the file containing the ONNX model.</param>
         /// <param name="shapeDictionary">ONNX shapes to be used over those loaded from <paramref name="modelFile"/>.
         /// For keys use names as stated in the ONNX model, e.g. "input". Stating the shapes with this parameter
-        /// is particullarly useful for working with variable dimension inputs and outputs.
+        /// is particularly useful for working with variable dimension inputs and outputs.
         /// </param>
         /// <param name="gpuDeviceId">Optional GPU device ID to run execution on, <see langword="null" /> to run on CPU.</param>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
@@ -176,6 +176,33 @@ public static class OnnxCatalog
         => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames,
             modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary: shapeDictionary);
 
+        /// <summary>
+        /// Create a <see cref="OnnxScoringEstimator"/>, which applies a pre-trained Onnx model to the <paramref name="inputColumnNames"/> columns.
+        /// Please refer to <see cref="OnnxScoringEstimator"/> to learn more about the necessary dependencies,
+        /// and how to run it on a GPU.
+        /// </summary>
+        /// <param name="catalog">The transform's catalog.</param>
+        /// <param name="outputColumnNames">The output columns resulting from the transformation.</param>
+        /// <param name="inputColumnNames">The input columns.</param>
+        /// <param name="modelFile">The path of the file containing the ONNX model.</param>
+        /// <param name="shapeDictionary">ONNX shapes to be used over those loaded from <paramref name="modelFile"/>.
+        /// For keys use names as stated in the ONNX model, e.g. "input". Stating the shapes with this parameter
+        /// is particularly useful for working with variable dimension inputs and outputs.
+        /// </param>
+        /// <param name="gpuDeviceId">Optional GPU device ID to run execution on, <see langword="null" /> to run on CPU.</param>
+        /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
+        /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
+        public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog,
+            string[] outputColumnNames,
+            string[] inputColumnNames,
+            string modelFile,
+            IDictionary<string, int[]> shapeDictionary,
+            int? gpuDeviceId = null,
+            bool fallbackToCpu = false,
+            int recursionLimit = 100)
+        => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames,
+            modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary: shapeDictionary, recursionLimit);
+
         /// <summary>
         /// Create <see cref="DnnImageFeaturizerEstimator"/>, which applies one of the pre-trained DNN models in
         /// <see cref="DnnImageModelSelector"/> to featurize an image.

diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs
@@ -87,6 +87,9 @@ internal sealed class Options : TransformInputBase
 
             [Argument(ArgumentType.Multiple, HelpText = "Shapes used to overwrite shapes loaded from ONNX file.", SortOrder = 5)]
             public CustomShapeInfo[] CustomShapeInfos;
+
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Protobuf CodedInputStream recursion limit.", SortOrder = 6)]
+            public int RecursionLimit = 100;
         }
 
         /// <summary>
@@ -126,8 +129,9 @@ private static VersionInfo GetVersionInfo()
                 modelSignature: "ONNXSCOR",
                 // version 10001 is single input & output.
                 // version 10002 = multiple inputs & outputs
-                verWrittenCur: 0x00010002,
-                verReadableCur: 0x00010002,
+                // version 10003 = custom protobuf recursion limit
+                verWrittenCur: 0x00010003,
+                verReadableCur: 0x00010003,
                 verWeCanReadBack: 0x00010001,
                 loaderSignature: LoaderSignature,
             loaderAssemblyName: typeof(OnnxTransformer).Assembly.FullName);
@@ -184,7 +188,26 @@ private static OnnxTransformer Create(IHostEnvironment env, ModelLoadContext ctx
                 }
             }
 
-            var options = new Options() { InputColumns = inputs, OutputColumns = outputs, CustomShapeInfos = loadedCustomShapeInfos };
+            int recursionLimit;
+
+            // Recursion limit change
+            if (ctx.Header.ModelVerWritten >= 0x00010003)
+            {
+                recursionLimit = ctx.Reader.ReadInt32();
+            }
+            else
+            {
+                // Default if not written inside ONNX model
+                recursionLimit = 100;
+            }
+
+            var options = new Options()
+            {
+                InputColumns = inputs,
+                OutputColumns = outputs,
+                CustomShapeInfos = loadedCustomShapeInfos,
+                RecursionLimit = recursionLimit
+            };
 
             return new OnnxTransformer(env, options, modelBytes);
         }
@@ -221,13 +244,13 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
                     Host.CheckNonWhiteSpace(options.ModelFile, nameof(options.ModelFile));
                     Host.CheckIO(File.Exists(options.ModelFile), "Model file {0} does not exists.", options.ModelFile);
                     // Because we cannot delete the user file, ownModelFile should be false.
-                    Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu, ownModelFile: false, shapeDictionary: shapeDictionary);
+                    Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu, ownModelFile: false, shapeDictionary: shapeDictionary, options.RecursionLimit);
                 }
                 else
                 {
                     // Entering this region means that the byte[] is passed as the model. To feed that byte[] to ONNXRuntime, we need
                     // to create a temporal file to store it and then call ONNXRuntime's API to load that file.
-                    Model = OnnxModel.CreateFromBytes(modelBytes, env, options.GpuDeviceId, options.FallbackToCpu, shapeDictionary: shapeDictionary);
+                    Model = OnnxModel.CreateFromBytes(modelBytes, env, options.GpuDeviceId, options.FallbackToCpu, shapeDictionary: shapeDictionary, options.RecursionLimit);
                 }
             }
             catch (OnnxRuntimeException e)
@@ -258,16 +281,18 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
         /// <param name="gpuDeviceId">Optional GPU device ID to run execution on. Null for CPU.</param>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
         /// <param name="shapeDictionary"></param>
+        /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
         internal OnnxTransformer(IHostEnvironment env, string modelFile, int? gpuDeviceId = null,
-            bool fallbackToCpu = false, IDictionary<string, int[]> shapeDictionary = null)
+            bool fallbackToCpu = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
             : this(env, new Options()
             {
                 ModelFile = modelFile,
                 InputColumns = new string[] { },
                 OutputColumns = new string[] { },
                 GpuDeviceId = gpuDeviceId,
                 FallbackToCpu = fallbackToCpu,
-                CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray()
+                CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray(),
+                RecursionLimit = recursionLimit
             })
         {
         }
@@ -283,16 +308,18 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
         /// <param name="gpuDeviceId">Optional GPU device ID to run execution on. Null for CPU.</param>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
         /// <param name="shapeDictionary"></param>
+        /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
         internal OnnxTransformer(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false,
-             IDictionary<string, int[]> shapeDictionary = null)
+             IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
             : this(env, new Options()
             {
                 ModelFile = modelFile,
                 InputColumns = inputColumnNames,
                 OutputColumns = outputColumnNames,
                 GpuDeviceId = gpuDeviceId,
                 FallbackToCpu = fallbackToCpu,
-                CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray()
+                CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray(),
+                RecursionLimit = recursionLimit
             })
         {
         }
@@ -325,6 +352,8 @@ private protected override void SaveModel(ModelSaveContext ctx)
                 ctx.SaveNonEmptyString(info.Name);
                 ctx.Writer.WriteIntArray(info.Shape);
             }
+
+            ctx.Writer.Write(_options.RecursionLimit);
         }
 
         private protected override IRowMapper MakeRowMapper(DataViewSchema inputSchema) => new Mapper(this, inputSchema);
@@ -807,10 +836,11 @@ public sealed class OnnxScoringEstimator : TrivialEstimator<OnnxTransformer>
         /// <param name="gpuDeviceId">Optional GPU device ID to run execution on. Null for CPU.</param>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
         /// <param name="shapeDictionary"></param>
+        /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
         [BestFriend]
         internal OnnxScoringEstimator(IHostEnvironment env, string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false,
-            IDictionary<string, int[]> shapeDictionary = null)
-            : this(env, new OnnxTransformer(env, new string[] { }, new string[] { }, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary))
+            IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
+            : this(env, new OnnxTransformer(env, new string[] { }, new string[] { }, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary, recursionLimit))
         {
         }
 
@@ -825,9 +855,10 @@ public sealed class OnnxScoringEstimator : TrivialEstimator<OnnxTransformer>
         /// <param name="gpuDeviceId">Optional GPU device ID to run execution on. Null for CPU.</param>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
         /// <param name="shapeDictionary"></param>
+        /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
         internal OnnxScoringEstimator(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelFile,
-            int? gpuDeviceId = null, bool fallbackToCpu = false, IDictionary<string, int[]> shapeDictionary = null)
-           : this(env, new OnnxTransformer(env, outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary))
+            int? gpuDeviceId = null, bool fallbackToCpu = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
+           : this(env, new OnnxTransformer(env, outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary, recursionLimit))
         {
         }
 

diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs b/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs
@@ -164,8 +164,9 @@ public OnnxVariableInfo(string name, OnnxShape shape, Type typeInOnnxRuntime, Da
         /// <param name="ownModelFile">If true, the <paramref name="modelFile"/> will be deleted when <see cref="OnnxModel"/> is
         /// no longer needed.</param>
         /// <param name="shapeDictionary"></param>
+        /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
         public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false,
-            bool ownModelFile=false, IDictionary<string, int[]> shapeDictionary = null)
+            bool ownModelFile=false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
         {
             // If we don't own the model file, _disposed should be false to prevent deleting user's file.
             _disposed = false;
@@ -204,7 +205,7 @@ public OnnxVariableInfo(string name, OnnxShape shape, Type typeInOnnxRuntime, Da
 
                 // The CodedInputStream auto closes the stream, and we need to make sure that our main stream stays open, so creating a new one here.
                 using (var modelStream = new FileStream(modelFile, FileMode.Open, FileAccess.Read, FileShare.Delete | FileShare.Read))
-                using (var codedStream = Google.Protobuf.CodedInputStream.CreateWithLimits(modelStream, Int32.MaxValue, 100))
+                using (var codedStream = Google.Protobuf.CodedInputStream.CreateWithLimits(modelStream, Int32.MaxValue, recursionLimit))
                     model = OnnxCSharpToProtoWrapper.ModelProto.Parser.ParseFrom(codedStream);
 
                 // Parse actual input and output types stored in the loaded ONNX model to get their DataViewType's.
@@ -321,7 +322,7 @@ private static bool CheckOnnxShapeCompatibility(IEnumerable<int> left, IEnumerab
 
         /// <summary>
         /// Create an OnnxModel from a byte[]. Usually, a ONNX model is consumed by <see cref="OnnxModel"/> as a file.
-        /// With <see cref="CreateFromBytes(byte[], IHostEnvironment)"/> and <see cref="CreateFromBytes(byte[], IHostEnvironment, int?, bool, IDictionary{string, int[]})"/>,
+        /// With <see cref="CreateFromBytes(byte[], IHostEnvironment)"/> and <see cref="CreateFromBytes(byte[], IHostEnvironment, int?, bool, IDictionary{string, int[]}, int)"/>,
         /// it's possible to use in-memory model (type: byte[]) to create <see cref="OnnxModel"/>.
         /// </summary>
         /// <param name="modelBytes">Bytes of the serialized model</param>
@@ -335,7 +336,7 @@ public static OnnxModel CreateFromBytes(byte[] modelBytes, IHostEnvironment env)
         /// Create an OnnxModel from a byte[]. Set execution to GPU if required.
         /// Usually, a ONNX model is consumed by <see cref="OnnxModel"/> as a file.
         /// With <see cref="CreateFromBytes(byte[], IHostEnvironment)"/> and
-        /// <see cref="CreateFromBytes(byte[], IHostEnvironment, int?, bool, IDictionary{string, int[]})"/>,
+        /// <see cref="CreateFromBytes(byte[], IHostEnvironment, int?, bool, IDictionary{string, int[]}, int)"/>,
         /// it's possible to use in-memory model (type: byte[]) to create <see cref="OnnxModel"/>.
         /// </summary>
         /// <param name="modelBytes">Bytes of the serialized model.</param>
@@ -345,14 +346,15 @@ public static OnnxModel CreateFromBytes(byte[] modelBytes, IHostEnvironment env)
         /// <param name="shapeDictionary">User-provided shapes. If the key "myTensorName" is associated
         /// with the value [1, 3, 5], the shape of "myTensorName" will be set to [1, 3, 5].
         /// The shape loaded from <paramref name="modelBytes"/> would be overwritten.</param>
+        /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
         /// <returns>An <see cref="OnnxModel"/></returns>
         public static OnnxModel CreateFromBytes(byte[] modelBytes, IHostEnvironment env, int? gpuDeviceId = null, bool fallbackToCpu = false,
-            IDictionary<string, int[]> shapeDictionary = null)
+            IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
         {
             var tempModelFile = Path.Combine(((IHostEnvironmentInternal)env).TempFilePath, Path.GetRandomFileName());
             File.WriteAllBytes(tempModelFile, modelBytes);
             return new OnnxModel(tempModelFile, gpuDeviceId, fallbackToCpu,
-                ownModelFile: true, shapeDictionary: shapeDictionary);
+                ownModelFile: true, shapeDictionary: shapeDictionary, recursionLimit);
         }
 
         /// <summary>