From b382af5d4141c0f3b61ba7a0cc0a724e019353a1 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 17 May 2022 18:14:09 -0500 Subject: [PATCH 1/4] FIX Fixes performance regression in tree for low cardinality data --- sklearn/tree/_splitter.pyx | 8 ++-- sklearn/utils/_sorting.pxd | 1 + sklearn/utils/_sorting.pyx | 85 +++++++++++++++++++++++++++++++++++--- 3 files changed, 86 insertions(+), 8 deletions(-) diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index a14d0ce26ee92..b912a3e7aede0 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -342,7 +342,7 @@ cdef class BestSplitter(BaseDenseSplitter): for i in range(start, end): Xf[i] = self.X[samples[i], current.feature] - simultaneous_sort(&Xf[start], &samples[start], end - start) + simultaneous_sort(&Xf[start], &samples[start], end - start, use_introsort=1) if Xf[end - 1] <= Xf[start] + FEATURE_THRESHOLD: features[f_j], features[n_total_constants] = features[n_total_constants], features[f_j] @@ -1049,9 +1049,11 @@ cdef class BestSparseSplitter(BaseSparseSplitter): &is_samples_sorted) # Sort the positive and negative parts of `Xf` - simultaneous_sort(&Xf[start], &samples[start], end_negative - start) + simultaneous_sort(&Xf[start], &samples[start], end_negative - start, + use_introsort=1) if start_positive < end: - simultaneous_sort(&Xf[start_positive], &samples[start_positive], end - start_positive) + simultaneous_sort(&Xf[start_positive], &samples[start_positive], + end - start_positive, use_introsort=1) # Update index_to_samples to take into account the sort for p in range(start, end_negative): diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd index 412d67c479fac..4c5483e7ceda1 100644 --- a/sklearn/utils/_sorting.pxd +++ b/sklearn/utils/_sorting.pxd @@ -6,4 +6,5 @@ cdef int simultaneous_sort( floating *dist, ITYPE_t *idx, ITYPE_t size, + bint use_introsort=*, ) nogil diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx index 367448b5cb91b..3c5c780031f4c 100644 --- a/sklearn/utils/_sorting.pyx +++ b/sklearn/utils/_sorting.pyx @@ -1,4 +1,5 @@ from cython cimport floating +from libc.math cimport log2 cdef inline void dual_swap( floating* darr, @@ -16,10 +17,62 @@ cdef inline void dual_swap( iarr[b] = itmp -cdef int simultaneous_sort( +cdef inline void sift_down( + floating* values, + ITYPE_t* samples, + ITYPE_t start, + ITYPE_t end, +) nogil: + # Restore heap order in values[start:end] by moving the max element to start. + cdef ITYPE_t child, maxind, root + + root = start + while True: + child = root * 2 + 1 + + # find max of root, left child, right child + maxind = root + if child < end and values[maxind] < values[child]: + maxind = child + if child + 1 < end and values[maxind] < values[child + 1]: + maxind = child + 1 + + if maxind == root: + break + else: + dual_swap(values, samples, root, maxind) + root = maxind + + +cdef inline void heapsort( + floating* values, + ITYPE_t* samples, + ITYPE_t n +) nogil: + cdef: + ITYPE_t start = (n - 2) / 2 + ITYPE_t end = n + + # heapify + while True: + sift_down(values, samples, start, end) + if start == 0: + break + start -= 1 + + # sort by shrinking the heap, putting the max element immediately after it + end = n - 1 + while end > 0: + dual_swap(values, samples, 0, end) + sift_down(values, samples, 0, end) + end = end - 1 + + +cdef inline int simultaneous_sort( floating* values, ITYPE_t* indices, ITYPE_t size, + bint use_introsort=0, ) nogil: """ Perform a recursive quicksort on the values array as to sort them ascendingly. @@ -31,6 +84,10 @@ cdef int simultaneous_sort( i = np.argsort(dist) return dist[i], idx[i] + If use_introsort=1, then the introsort algorithm is used. This sorting algorithm + switches from quicksort to heapsort when the recursion depth based on + based on 2 * log2(size). + Notes ----- Arrays are manipulated via a pointer to there first element and their size @@ -41,6 +98,19 @@ cdef int simultaneous_sort( # The best might be using a std::stable_sort and a Comparator which might need # an Array of Structures (AoS) instead of the Structure of Arrays (SoA) # currently used. + if use_introsort == 1: + _simultaneous_sort(values, indices, size, 2 * log2(size), 1) + else: + _simultaneous_sort(values, indices, size, -1, 0) + + +cdef inline int _simultaneous_sort( + floating* values, + ITYPE_t* indices, + ITYPE_t size, + int max_depth, + bint use_introsort, +) nogil: cdef: ITYPE_t pivot_idx, i, store_idx floating pivot_val @@ -58,7 +128,12 @@ cdef int simultaneous_sort( dual_swap(values, indices, 1, 2) if values[0] > values[1]: dual_swap(values, indices, 0, 1) + elif use_introsort and max_depth <= 0: + # heapsort + heapsort(values, indices, size) else: + if use_introsort: + max_depth -= 1 # Determine the pivot using the median-of-three rule. # The smallest of the three is moved to the beginning of the array, # the middle (the pivot value) is moved to the end, and the largest @@ -85,9 +160,9 @@ cdef int simultaneous_sort( # Recursively sort each side of the pivot if pivot_idx > 1: - simultaneous_sort(values, indices, pivot_idx) + _simultaneous_sort(values, indices, pivot_idx, max_depth, use_introsort) if pivot_idx + 2 < size: - simultaneous_sort(values + pivot_idx + 1, - indices + pivot_idx + 1, - size - pivot_idx - 1) + _simultaneous_sort(values + pivot_idx + 1, + indices + pivot_idx + 1, + size - pivot_idx - 1, max_depth, use_introsort) return 0 From 527bb22fff0bb9e73420d6590441537190b8eb1a Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 17 May 2022 18:15:36 -0500 Subject: [PATCH 2/4] DOC Adds whats new --- doc/whats_new/v1.1.rst | 10 ++++++++++ sklearn/utils/_sorting.pyx | 1 - 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 87a52595ce6c2..fe98ccf2be5a2 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -33,6 +33,16 @@ Changelog precision=class balance. :pr:`23214` by :user:`Stéphane Collot ` and :user:`Max Baak `. +:mod:`sklearn.tree` +................... + +- |Fix| Fixes performance regression :class:`tree.DecisionTreeClassifier`, + :class:`tree.DecisionTreeRegressor`, + :class:`ensemble.RandomForestClassifier`, + :class:`ensemble.RandomForestRegressor`, + :class:`ensemble.GradientBoostingClassifier`, and + :class:`ensemble.GradientBoostingRegressor`. :pr:`xxxxx` by `Thomas Fan`_. + :mod:`sklearn.utils` .................... diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx index 3c5c780031f4c..9f14988445ff9 100644 --- a/sklearn/utils/_sorting.pyx +++ b/sklearn/utils/_sorting.pyx @@ -129,7 +129,6 @@ cdef inline int _simultaneous_sort( if values[0] > values[1]: dual_swap(values, indices, 0, 1) elif use_introsort and max_depth <= 0: - # heapsort heapsort(values, indices, size) else: if use_introsort: From 0a7e16fff87caa29416015b3659a9c4e32d00d8b Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 17 May 2022 18:50:57 -0500 Subject: [PATCH 3/4] DOC Adds whats new --- doc/whats_new/v1.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index fe98ccf2be5a2..a9a4bef78e0b1 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -41,7 +41,7 @@ Changelog :class:`ensemble.RandomForestClassifier`, :class:`ensemble.RandomForestRegressor`, :class:`ensemble.GradientBoostingClassifier`, and - :class:`ensemble.GradientBoostingRegressor`. :pr:`xxxxx` by `Thomas Fan`_. + :class:`ensemble.GradientBoostingRegressor`. :pr:`23404` by `Thomas Fan`_. :mod:`sklearn.utils` .................... From 62ffd5e18d7a5de2f1ea7a4d9dc8a39f67b279c3 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 17 May 2022 23:41:05 -0500 Subject: [PATCH 4/4] ENH Simplify logic --- sklearn/utils/_sorting.pyx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx index 9f14988445ff9..2349e7e769aa5 100644 --- a/sklearn/utils/_sorting.pyx +++ b/sklearn/utils/_sorting.pyx @@ -131,8 +131,6 @@ cdef inline int _simultaneous_sort( elif use_introsort and max_depth <= 0: heapsort(values, indices, size) else: - if use_introsort: - max_depth -= 1 # Determine the pivot using the median-of-three rule. # The smallest of the three is moved to the beginning of the array, # the middle (the pivot value) is moved to the end, and the largest @@ -159,9 +157,9 @@ cdef inline int _simultaneous_sort( # Recursively sort each side of the pivot if pivot_idx > 1: - _simultaneous_sort(values, indices, pivot_idx, max_depth, use_introsort) + _simultaneous_sort(values, indices, pivot_idx, max_depth - 1, use_introsort) if pivot_idx + 2 < size: _simultaneous_sort(values + pivot_idx + 1, indices + pivot_idx + 1, - size - pivot_idx - 1, max_depth, use_introsort) + size - pivot_idx - 1, max_depth - 1, use_introsort) return 0