diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 87a52595ce6c2..a9a4bef78e0b1 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -33,6 +33,16 @@ Changelog precision=class balance. :pr:`23214` by :user:`Stéphane Collot ` and :user:`Max Baak `. +:mod:`sklearn.tree` +................... + +- |Fix| Fixes performance regression :class:`tree.DecisionTreeClassifier`, + :class:`tree.DecisionTreeRegressor`, + :class:`ensemble.RandomForestClassifier`, + :class:`ensemble.RandomForestRegressor`, + :class:`ensemble.GradientBoostingClassifier`, and + :class:`ensemble.GradientBoostingRegressor`. :pr:`23404` by `Thomas Fan`_. + :mod:`sklearn.utils` .................... diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index a14d0ce26ee92..b912a3e7aede0 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -342,7 +342,7 @@ cdef class BestSplitter(BaseDenseSplitter): for i in range(start, end): Xf[i] = self.X[samples[i], current.feature] - simultaneous_sort(&Xf[start], &samples[start], end - start) + simultaneous_sort(&Xf[start], &samples[start], end - start, use_introsort=1) if Xf[end - 1] <= Xf[start] + FEATURE_THRESHOLD: features[f_j], features[n_total_constants] = features[n_total_constants], features[f_j] @@ -1049,9 +1049,11 @@ cdef class BestSparseSplitter(BaseSparseSplitter): &is_samples_sorted) # Sort the positive and negative parts of `Xf` - simultaneous_sort(&Xf[start], &samples[start], end_negative - start) + simultaneous_sort(&Xf[start], &samples[start], end_negative - start, + use_introsort=1) if start_positive < end: - simultaneous_sort(&Xf[start_positive], &samples[start_positive], end - start_positive) + simultaneous_sort(&Xf[start_positive], &samples[start_positive], + end - start_positive, use_introsort=1) # Update index_to_samples to take into account the sort for p in range(start, end_negative): diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd index 412d67c479fac..4c5483e7ceda1 100644 --- a/sklearn/utils/_sorting.pxd +++ b/sklearn/utils/_sorting.pxd @@ -6,4 +6,5 @@ cdef int simultaneous_sort( floating *dist, ITYPE_t *idx, ITYPE_t size, + bint use_introsort=*, ) nogil diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx index 367448b5cb91b..2349e7e769aa5 100644 --- a/sklearn/utils/_sorting.pyx +++ b/sklearn/utils/_sorting.pyx @@ -1,4 +1,5 @@ from cython cimport floating +from libc.math cimport log2 cdef inline void dual_swap( floating* darr, @@ -16,10 +17,62 @@ cdef inline void dual_swap( iarr[b] = itmp -cdef int simultaneous_sort( +cdef inline void sift_down( + floating* values, + ITYPE_t* samples, + ITYPE_t start, + ITYPE_t end, +) nogil: + # Restore heap order in values[start:end] by moving the max element to start. + cdef ITYPE_t child, maxind, root + + root = start + while True: + child = root * 2 + 1 + + # find max of root, left child, right child + maxind = root + if child < end and values[maxind] < values[child]: + maxind = child + if child + 1 < end and values[maxind] < values[child + 1]: + maxind = child + 1 + + if maxind == root: + break + else: + dual_swap(values, samples, root, maxind) + root = maxind + + +cdef inline void heapsort( + floating* values, + ITYPE_t* samples, + ITYPE_t n +) nogil: + cdef: + ITYPE_t start = (n - 2) / 2 + ITYPE_t end = n + + # heapify + while True: + sift_down(values, samples, start, end) + if start == 0: + break + start -= 1 + + # sort by shrinking the heap, putting the max element immediately after it + end = n - 1 + while end > 0: + dual_swap(values, samples, 0, end) + sift_down(values, samples, 0, end) + end = end - 1 + + +cdef inline int simultaneous_sort( floating* values, ITYPE_t* indices, ITYPE_t size, + bint use_introsort=0, ) nogil: """ Perform a recursive quicksort on the values array as to sort them ascendingly. @@ -31,6 +84,10 @@ cdef int simultaneous_sort( i = np.argsort(dist) return dist[i], idx[i] + If use_introsort=1, then the introsort algorithm is used. This sorting algorithm + switches from quicksort to heapsort when the recursion depth based on + based on 2 * log2(size). + Notes ----- Arrays are manipulated via a pointer to there first element and their size @@ -41,6 +98,19 @@ cdef int simultaneous_sort( # The best might be using a std::stable_sort and a Comparator which might need # an Array of Structures (AoS) instead of the Structure of Arrays (SoA) # currently used. + if use_introsort == 1: + _simultaneous_sort(values, indices, size, 2 * log2(size), 1) + else: + _simultaneous_sort(values, indices, size, -1, 0) + + +cdef inline int _simultaneous_sort( + floating* values, + ITYPE_t* indices, + ITYPE_t size, + int max_depth, + bint use_introsort, +) nogil: cdef: ITYPE_t pivot_idx, i, store_idx floating pivot_val @@ -58,6 +128,8 @@ cdef int simultaneous_sort( dual_swap(values, indices, 1, 2) if values[0] > values[1]: dual_swap(values, indices, 0, 1) + elif use_introsort and max_depth <= 0: + heapsort(values, indices, size) else: # Determine the pivot using the median-of-three rule. # The smallest of the three is moved to the beginning of the array, @@ -85,9 +157,9 @@ cdef int simultaneous_sort( # Recursively sort each side of the pivot if pivot_idx > 1: - simultaneous_sort(values, indices, pivot_idx) + _simultaneous_sort(values, indices, pivot_idx, max_depth - 1, use_introsort) if pivot_idx + 2 < size: - simultaneous_sort(values + pivot_idx + 1, - indices + pivot_idx + 1, - size - pivot_idx - 1) + _simultaneous_sort(values + pivot_idx + 1, + indices + pivot_idx + 1, + size - pivot_idx - 1, max_depth - 1, use_introsort) return 0