Proper Python worker reuse for test_model_cache. (#3863)

* Proper Python worker reuse for test_model_cache. Signed-off-by: Joshua Anickat <joanicka@microsoft.com> * try range Signed-off-by: harupy <hkawamura0130@gmail.com> * remove comment Signed-off-by: harupy <hkawamura0130@gmail.com> * remove blankline Signed-off-by: harupy <hkawamura0130@gmail.com> Co-authored-by: Joshua Anickat <joanicka@microsoft.com> Co-authored-by: harupy <hkawamura0130@gmail.com>
mlflow · Dec 7, 2021 · 587bc8e · 587bc8e
1 parent 66820dd
commit 587bc8e
Showing 1 changed file with 4 additions and 6 deletions.
diff --git a/tests/pyfunc/test_spark.py b/tests/pyfunc/test_spark.py
@@ -263,10 +263,8 @@ def get_model(_):
     # Note that we can't necessarily expect an even split, or even that there were only
     # exactly 2 python processes launched, due to Spark and its mysterious ways, but we do
     # expect significant reuse.
-    results = spark.sparkContext.parallelize(range(0, 100), 30).map(get_model).collect()
-
-    # TODO(tomas): Looks like spark does not reuse python workers with python==3.x
-    assert sys.version[0] == "3" or max(results) > 10
+    results = spark.sparkContext.parallelize(range(100), 30).map(get_model).collect()
+    assert max(results) > 10
     # Running again should see no newly-loaded models.
-    results2 = spark.sparkContext.parallelize(range(0, 100), 30).map(get_model).collect()
-    assert sys.version[0] == "3" or min(results2) > 0
+    results2 = spark.sparkContext.parallelize(range(100), 30).map(get_model).collect()
+    assert min(results2) > 0