dotnet · michaelgsharp · Oct 12, 2021 · Oct 7, 2021 · Oct 7, 2021 · Oct 8, 2021
diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs b/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs
@@ -83,6 +83,8 @@ public static class OnnxCatalog
         /// <param name="modelFile">The path of the file containing the ONNX model.</param>
         /// <param name="gpuDeviceId">Optional GPU device ID to run execution on, <see langword="null" /> to run on CPU.</param>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
+        /// <param name="interOpNumThreads">Controls the number of threads used to parallelize the execution of the graph (across nodes).</param>
+        /// <param name="intraOpNumThreads">Controls the number of threads to use to run the model.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
@@ -95,9 +97,11 @@ public static class OnnxCatalog
             string inputColumnName,
             string modelFile,
             int? gpuDeviceId = null,
-            bool fallbackToCpu = false)
-        => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), new[] { outputColumnName }, new[] { inputColumnName },
-            modelFile, gpuDeviceId, fallbackToCpu);
+            bool fallbackToCpu = false,
+            int? interOpNumThreads = default,
+            int? intraOpNumThreads = default)
+            => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), new[] { outputColumnName }, new[] { inputColumnName },
+            modelFile, gpuDeviceId, fallbackToCpu, interOpNumThreads: interOpNumThreads, intraOpNumThreads: intraOpNumThreads);
 
         /// <summary>
         /// Create a <see cref="OnnxScoringEstimator"/>, which applies a pre-trained Onnx model to the <paramref name="inputColumnName"/> column.

diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs
@@ -90,6 +90,12 @@ internal sealed class Options : TransformInputBase
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Protobuf CodedInputStream recursion limit.", SortOrder = 6)]
             public int RecursionLimit = 100;
+
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Controls the number of threads used to parallelize the execution of the graph (across nodes).", SortOrder = 7)]
+            public int? InterOpNumThreads = null;
+
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Controls the number of threads to use to run the model.", SortOrder = 8)]
+            public int? IntraOpNumThreads = null;
         }
 
         /// <summary>
@@ -244,7 +250,8 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
                     Host.CheckNonWhiteSpace(options.ModelFile, nameof(options.ModelFile));
                     Host.CheckIO(File.Exists(options.ModelFile), "Model file {0} does not exists.", options.ModelFile);
                     // Because we cannot delete the user file, ownModelFile should be false.
-                    Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu, ownModelFile: false, shapeDictionary: shapeDictionary, options.RecursionLimit);
+                    Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu, ownModelFile: false, shapeDictionary: shapeDictionary, options.RecursionLimit,
+                        options.InterOpNumThreads, options.IntraOpNumThreads);
                 }
                 else
                 {
@@ -309,8 +316,11 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
         /// <param name="shapeDictionary"></param>
         /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
+        /// <param name="interOpNumThreads">Controls the number of threads used to parallelize the execution of the graph (across nodes).</param>
+        /// <param name="intraOpNumThreads">Controls the number of threads to use to run the model.</param>
         internal OnnxTransformer(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false,
-             IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
+            IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100,
+            int? interOpNumThreads = null, int? intraOpNumThreads = null)
             : this(env, new Options()
             {
                 ModelFile = modelFile,
@@ -319,7 +329,9 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
                 GpuDeviceId = gpuDeviceId,
                 FallbackToCpu = fallbackToCpu,
                 CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray(),
-                RecursionLimit = recursionLimit
+                RecursionLimit = recursionLimit,
+                InterOpNumThreads = interOpNumThreads,
+                IntraOpNumThreads = intraOpNumThreads
             })
         {
         }
@@ -856,9 +868,12 @@ public sealed class OnnxScoringEstimator : TrivialEstimator<OnnxTransformer>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
         /// <param name="shapeDictionary"></param>
         /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
+        /// <param name="interOpNumThreads">Controls the number of threads used to parallelize the execution of the graph (across nodes).</param>
+        /// <param name="intraOpNumThreads">Controls the number of threads to use to run the model.</param>
         internal OnnxScoringEstimator(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelFile,
-            int? gpuDeviceId = null, bool fallbackToCpu = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
-           : this(env, new OnnxTransformer(env, outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary, recursionLimit))
+            int? gpuDeviceId = null, bool fallbackToCpu = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100,
+            int? interOpNumThreads = null, int? intraOpNumThreads = null)
+           : this(env, new OnnxTransformer(env, outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary, recursionLimit, interOpNumThreads, intraOpNumThreads))
         {
         }
 

diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs b/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs
@@ -165,8 +165,11 @@ public OnnxVariableInfo(string name, OnnxShape shape, Type typeInOnnxRuntime, Da
         /// no longer needed.</param>
         /// <param name="shapeDictionary"></param>
         /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
+        /// <param name="interOpNumThreads">Controls the number of threads used to parallelize the execution of the graph (across nodes).</param>
+        /// <param name="intraOpNumThreads">Controls the number of threads to use to run the model.</param>
         public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false,
-            bool ownModelFile = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
+            bool ownModelFile = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100,
+            int? interOpNumThreads = null, int? intraOpNumThreads = null)
         {
             // If we don't own the model file, _disposed should be false to prevent deleting user's file.
             _disposed = false;
@@ -181,15 +184,27 @@ public OnnxVariableInfo(string name, OnnxShape shape, Type typeInOnnxRuntime, Da
                 catch (OnnxRuntimeException)
                 {
                     if (fallbackToCpu)
-                        _session = new InferenceSession(modelFile);
+                    {
+                        var sessionOptions = new SessionOptions()
+                        {
+                            InterOpNumThreads = interOpNumThreads.GetValueOrDefault(),
+                            IntraOpNumThreads = intraOpNumThreads.GetValueOrDefault()
+                        };
+                        _session = new InferenceSession(modelFile, sessionOptions);
+                    }
                     else
                         // If called from OnnxTransform, is caught and rethrown
                         throw;
                 }
             }
             else
             {
-                _session = new InferenceSession(modelFile);
+                var sessionOptions = new SessionOptions()
+                {
+                    InterOpNumThreads = interOpNumThreads.GetValueOrDefault(),
+                    IntraOpNumThreads = intraOpNumThreads.GetValueOrDefault()
+                };
+                _session = new InferenceSession(modelFile, sessionOptions);
             }
 
             try