codecov · giovanni-guidini · Jan 2, 2024
diff --git a/codecov_cli/commands/labelanalysis.py b/codecov_cli/commands/labelanalysis.py
@@ -192,7 +192,7 @@ def label_analysis(
                 runner.process_labelanalysis_result(request_result)
             else:
                 _dry_run_output(
-                    LabelAnalysisRequestResult(request_result),
+                    request_result,
                     runner,
                     dry_run_format,
                     # It's possible that the task had processing errors and fallback to all tests
@@ -287,16 +287,14 @@ def _potentially_calculate_absent_labels(
         global_level_labels_set = set(request_result.get("global_level_labels", []))
         final_result = LabelAnalysisRequestResult(
             {
-                "present_report_labels": sorted(
+                "present_report_labels": list(
                     present_report_labels_set & requested_labels_set
                 ),
-                "present_diff_labels": sorted(
+                "present_diff_labels": list(
                     present_diff_labels_set & requested_labels_set
                 ),
-                "absent_labels": sorted(
-                    requested_labels_set - present_report_labels_set
-                ),
-                "global_level_labels": sorted(
+                "absent_labels": list(requested_labels_set - present_report_labels_set),
+                "global_level_labels": list(
                     global_level_labels_set & requested_labels_set
                 ),
             }
@@ -312,6 +310,10 @@ def _potentially_calculate_absent_labels(
             )
         ),
     )
+    # Requested labels is important because it preserves the collection order
+    # of the testing tool. Running tests out of order might surface errors with
+    # the tests.
+    final_result["collected_labels_in_order"] = requested_labels
     return final_result
 
 
@@ -369,15 +371,15 @@ def _send_labelanalysis_request(payload, url, token_header):
 
 
 def _dry_run_json_output(
-    labels_to_run: set,
-    labels_to_skip: set,
+    labels_to_run: List[str],
+    labels_to_skip: List[str],
     runner_options: List[str],
     fallback_reason: str = None,
 ) -> None:
     output_as_dict = dict(
         runner_options=runner_options,
-        ats_tests_to_run=sorted(labels_to_run),
-        ats_tests_to_skip=sorted(labels_to_skip),
+        ats_tests_to_run=labels_to_run,
+        ats_tests_to_skip=labels_to_skip,
         ats_fallback_reason=fallback_reason,
     )
     # ⚠️ DON'T use logger
@@ -386,21 +388,21 @@ def _dry_run_json_output(
 
 
 def _dry_run_list_output(
-    labels_to_run: set,
-    labels_to_skip: set,
+    labels_to_run: List[str],
+    labels_to_skip: List[str],
     runner_options: List[str],
     fallback_reason: str = None,
 ) -> None:
     if fallback_reason:
         logger.warning(f"label-analysis didn't run correctly. Error: {fallback_reason}")
 
     to_run_line = " ".join(
-        sorted(map(lambda l: f"'{l}'", runner_options))
-        + sorted(map(lambda l: f"'{l}'", labels_to_run))
+        list(map(lambda l: f"'{l}'", runner_options))
+        + list(map(lambda l: f"'{l}'", labels_to_run))
     )
     to_skip_line = " ".join(
-        sorted(map(lambda l: f"'{l}'", runner_options))
-        + sorted(map(lambda l: f"'{l}'", labels_to_skip))
+        list(map(lambda l: f"'{l}'", runner_options))
+        + list(map(lambda l: f"'{l}'", labels_to_skip))
     )
     # ⚠️ DON'T use logger
     # logger goes to stderr, we want it in stdout
@@ -417,10 +419,8 @@ def _dry_run_output(
     # failed at some point. So it was not a completely successful task.
     fallback_reason: str = None,
 ):
-    labels_to_run = set(
-        result.absent_labels + result.global_level_labels + result.present_diff_labels
-    )
-    labels_to_skip = set(result.present_report_labels) - labels_to_run
+    labels_to_run = result.get_tests_to_run_in_collection_order()
+    labels_to_skip = result.get_tests_to_skip_in_collection_order()
 
     format_lookup = {
         "json": _dry_run_json_output,
@@ -451,6 +451,7 @@ def _fallback_to_collected_labels(
                 "absent_labels": collected_labels,
                 "present_diff_labels": [],
                 "global_level_labels": [],
+                "collected_labels_in_order": collected_labels,
             }
         )
         if not dry_run:

diff --git a/codecov_cli/runners/pytest_standard_runner.py b/codecov_cli/runners/pytest_standard_runner.py
@@ -140,35 +140,26 @@ def process_labelanalysis_result(self, result: LabelAnalysisRequestResult):
             f"--cov={self.params.coverage_root}",
             "--cov-context=test",
         ] + self.params.execute_tests_options
-        all_labels = set(
-            result.absent_labels
-            + result.present_diff_labels
-            + result.global_level_labels
-        )
-        skipped_tests = set(result.present_report_labels) - all_labels
-        if skipped_tests:
+        tests_to_run = result.get_tests_to_run_in_collection_order()
+        tests_to_skip = result.get_tests_to_skip_in_collection_order()
+        if tests_to_skip:
             logger.info(
                 "Some tests are being skipped. (run in verbose mode to get list of tests skipped)",
                 extra=dict(
-                    extra_log_attributes=dict(skipped_tests_count=len(skipped_tests))
+                    extra_log_attributes=dict(skipped_tests_count=len(tests_to_skip))
                 ),
             )
             logger.debug(
                 "List of skipped tests",
-                extra=dict(
-                    extra_log_attributes=dict(skipped_tests=sorted(skipped_tests))
-                ),
+                extra=dict(extra_log_attributes=dict(skipped_tests=tests_to_skip)),
             )
 
-        if len(all_labels) == 0:
-            all_labels = [random.choice(result.present_report_labels)]
+        if len(tests_to_run) == 0:
+            tests_to_run = [random.choice(result.present_report_labels)]
             logger.info(
                 "All tests are being skipped. Selected random label to run",
-                extra=dict(extra_log_attributes=dict(selected_label=all_labels[0])),
+                extra=dict(extra_log_attributes=dict(selected_label=tests_to_run[0])),
             )
-        tests_to_run = [
-            label.split("[")[0] if "[" in label else label for label in all_labels
-        ]
         command_array = default_options + tests_to_run
         logger.info(
             "Running tests. (run in verbose mode to get list of tests executed)"

diff --git a/codecov_cli/runners/types.py b/codecov_cli/runners/types.py
@@ -21,6 +21,34 @@ def present_diff_labels(self) -> List[str]:
     def global_level_labels(self) -> List[str]:
         return self.get("global_level_labels", [])
 
+    @property
+    def collected_labels_in_order(self) -> List[str]:
+        """The list of collected labels, in the order returned by the testing tool.
+        This is a superset of all other lists.
+        """
+        return self.get("collected_labels_in_order", [])
+
+    def get_tests_to_run_in_collection_order(self) -> List[str]:
+        labels_to_run = set(
+            self.absent_labels + self.global_level_labels + self.present_diff_labels
+        )
+        output = []
+        for test_name in self.collected_labels_in_order:
+            if test_name in labels_to_run:
+                output.append(test_name)
+        return output
+
+    def get_tests_to_skip_in_collection_order(self) -> List[str]:
+        labels_to_run = set(
+            self.absent_labels + self.global_level_labels + self.present_diff_labels
+        )
+        labels_to_skip = set(self.present_report_labels) - labels_to_run
+        output = []
+        for test_name in self.collected_labels_in_order:
+            if test_name in labels_to_skip:
+                output.append(test_name)
+        return output
+
 
 class LabelAnalysisRunnerInterface(object):
     params: Dict = None

diff --git a/tests/commands/test_invoke_labelanalysis.py b/tests/commands/test_invoke_labelanalysis.py
@@ -67,24 +67,87 @@ def test_potentially_calculate_labels_recalculate(self):
                 "label_1",
                 "label_2",
                 "label_3",
+                "label_5",
                 "label_old",
                 "label_older",
             ],
             "absent_labels": [],
-            "present_diff_labels": ["label_2", "label_3", "label_old"],
+            "present_diff_labels": ["label_5", "label_3", "label_old"],
             "global_level_labels": ["label_1", "label_older"],
         }
-        collected_labels = ["label_1", "label_2", "label_3", "label_4"]
+        collected_labels = [
+            "label_2",
+            "label_3",
+            "label_6",
+            "label_1",
+            "label_4",
+            "label_5",
+        ]
         expected = {
-            "present_diff_labels": ["label_2", "label_3"],
+            "present_diff_labels": ["label_3", "label_5"],
             "global_level_labels": ["label_1"],
-            "absent_labels": ["label_4"],
-            "present_report_labels": ["label_1", "label_2", "label_3"],
+            "absent_labels": ["label_6", "label_4"],
+            "present_report_labels": ["label_2", "label_3", "label_1", "label_5"],
         }
-        assert (
-            _potentially_calculate_absent_labels(request_result, collected_labels)
-            == expected
-        )
+        result = _potentially_calculate_absent_labels(request_result, collected_labels)
+        assert result.get_tests_to_run_in_collection_order() == [
+            "label_3",
+            "label_6",
+            "label_1",
+            "label_4",
+            "label_5",
+        ]
+        assert result.get_tests_to_skip_in_collection_order() == ["label_2"]
+
+    def test_potentially_calculate_labels_recalculate(self):
+        request_result = {
+            "present_report_labels": [
+                "label_1",
+                "label_2",
+                "label_3",
+                "label_5",
+                "label_old",
+                "label_older",
+            ],
+            "absent_labels": [],
+            "present_diff_labels": ["label_5", "label_3", "label_old"],
+            "global_level_labels": ["label_1", "label_older"],
+        }
+        collected_labels = [
+            "label_2",
+            "label_3",
+            "label_6",
+            "label_1",
+            "label_4",
+            "label_5",
+        ]
+        expected = {
+            "present_diff_labels": ["label_3", "label_5"],
+            "global_level_labels": ["label_1"],
+            "absent_labels": ["label_4", "label_6"],
+            "present_report_labels": ["label_1", "label_2", "label_3", "label_5"],
+        }
+        res = _potentially_calculate_absent_labels(request_result, collected_labels)
+        # It's tricky to assert correctedness because the ordering is not guaranteed
+        # So we force some and test the individual lists.
+        # Plus we test the ordering that actually matters
+        assert sorted(res.absent_labels) == expected["absent_labels"]
+        assert sorted(res.global_level_labels) == expected["global_level_labels"]
+        assert sorted(res.present_diff_labels) == expected["present_diff_labels"]
+        assert sorted(res.present_report_labels) == expected["present_report_labels"]
+        # This is the only list that needs to actually be ordered
+        assert res.collected_labels_in_order == collected_labels
+        # And the ordering that matters most
+        assert res.get_tests_to_run_in_collection_order() == [
+            "label_3",
+            "label_6",
+            "label_1",
+            "label_4",
+            "label_5",
+        ]
+        assert res.get_tests_to_skip_in_collection_order() == [
+            "label_2",
+        ]
 
     def test_send_label_analysis_bad_payload(self):
         payload = {
@@ -274,7 +337,7 @@ def test_invoke_label_analysis(
             assert result.exit_code == 0
         mock_get_runner.assert_called()
         fake_runner.process_labelanalysis_result.assert_called_with(
-            label_analysis_result
+            {**label_analysis_result, "collected_labels_in_order": collected_labels}
         )
         print(result.output)
 
@@ -341,7 +404,7 @@ def test_invoke_label_analysis_dry_run(
         )
         assert json.loads(result.stdout) == {
             "runner_options": ["--labels"],
-            "ats_tests_to_run": ["test_absent", "test_global", "test_in_diff"],
+            "ats_tests_to_run": ["test_absent", "test_in_diff", "test_global"],
             "ats_tests_to_skip": ["test_present"],
             "ats_fallback_reason": ats_fallback_reason,
         }
@@ -401,7 +464,7 @@ def test_invoke_label_analysis_dry_run_pytest_format(
         assert result.exit_code == 0
         assert (
             result.stdout
-            == "TESTS_TO_RUN='--labels' 'test_absent' 'test_global' 'test_in_diff'\nTESTS_TO_SKIP='--labels' 'test_present'\n"
+            == "TESTS_TO_RUN='--labels' 'test_absent' 'test_in_diff' 'test_global'\nTESTS_TO_SKIP='--labels' 'test_present'\n"
         )
 
     def test_fallback_to_collected_labels(self, mocker):
@@ -413,6 +476,7 @@ def test_fallback_to_collected_labels(self, mocker):
                 "absent_labels": collected_labels,
                 "present_diff_labels": [],
                 "global_level_labels": [],
+                "collected_labels_in_order": collected_labels,
             }
         )
         _fallback_to_collected_labels(collected_labels, mock_runner)
@@ -457,6 +521,7 @@ def test_fallback_collected_labels_covecov_500_error(
                     "absent_labels": collected_labels,
                     "present_diff_labels": [],
                     "global_level_labels": [],
+                    "collected_labels_in_order": collected_labels,
                 }
             )
             print(result.output)
@@ -494,7 +559,7 @@ def test_fallback_collected_labels_covecov_500_error_dry_run(
         # Dry run format defaults to json
         assert json.loads(result.stdout) == {
             "runner_options": ["--labels"],
-            "ats_tests_to_run": sorted(collected_labels),
+            "ats_tests_to_run": collected_labels,
             "ats_tests_to_skip": [],
             "ats_fallback_reason": "codecov_unavailable",
         }
@@ -554,6 +619,7 @@ def test_fallback_collected_labels_codecov_error_processing_label_analysis(
                     "absent_labels": collected_labels,
                     "present_diff_labels": [],
                     "global_level_labels": [],
+                    "collected_labels_in_order": collected_labels,
                 }
             )
             print(result.output)
@@ -612,7 +678,7 @@ def test_fallback_collected_labels_codecov_error_processing_label_analysis_dry_r
         # Dry run format defaults to json
         assert json.loads(result.stdout) == {
             "runner_options": ["--labels"],
-            "ats_tests_to_run": sorted(collected_labels),
+            "ats_tests_to_run": collected_labels,
             "ats_tests_to_skip": [],
             "ats_fallback_reason": "test_list_processing_failed",
         }
@@ -670,6 +736,7 @@ def test_fallback_collected_labels_codecov_max_wait_time_exceeded(
                 "absent_labels": collected_labels,
                 "present_diff_labels": [],
                 "global_level_labels": [],
+                "collected_labels_in_order": collected_labels,
             }
         )
 
@@ -720,13 +787,13 @@ def test_fallback_collected_labels_codecov_max_wait_time_exceeded_dry_run(
             mock_get_runner.assert_called()
             fake_runner.process_labelanalysis_result.assert_not_called()
         # Dry run format defaults to json
+        assert result.exit_code == 0
         assert json.loads(result.stdout) == {
             "runner_options": ["--labels"],
-            "ats_tests_to_run": sorted(collected_labels),
+            "ats_tests_to_run": collected_labels,
             "ats_tests_to_skip": [],
             "ats_fallback_reason": "max_wait_time_exceeded",
         }
-        assert result.exit_code == 0
 
     def test_first_labelanalysis_request_fails_but_second_works(
         self, get_labelanalysis_deps, mocker, use_verbose_option
@@ -778,6 +845,6 @@ def test_first_labelanalysis_request_fails_but_second_works(
             assert result.exit_code == 0
         mock_get_runner.assert_called()
         fake_runner.process_labelanalysis_result.assert_called_with(
-            label_analysis_result
+            {**label_analysis_result, "collected_labels_in_order": collected_labels}
         )
         print(result.output)