New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
FIX Fixes performance regression in trees #23404
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,4 +6,5 @@ cdef int simultaneous_sort( | |
floating *dist, | ||
ITYPE_t *idx, | ||
ITYPE_t size, | ||
bint use_introsort=*, | ||
) nogil |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
from cython cimport floating | ||
from libc.math cimport log2 | ||
|
||
cdef inline void dual_swap( | ||
floating* darr, | ||
|
@@ -16,10 +17,62 @@ cdef inline void dual_swap( | |
iarr[b] = itmp | ||
|
||
|
||
cdef int simultaneous_sort( | ||
cdef inline void sift_down( | ||
floating* values, | ||
ITYPE_t* samples, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
ITYPE_t start, | ||
ITYPE_t end, | ||
) nogil: | ||
# Restore heap order in values[start:end] by moving the max element to start. | ||
cdef ITYPE_t child, maxind, root | ||
|
||
root = start | ||
while True: | ||
child = root * 2 + 1 | ||
|
||
# find max of root, left child, right child | ||
maxind = root | ||
if child < end and values[maxind] < values[child]: | ||
maxind = child | ||
if child + 1 < end and values[maxind] < values[child + 1]: | ||
maxind = child + 1 | ||
|
||
if maxind == root: | ||
break | ||
else: | ||
dual_swap(values, samples, root, maxind) | ||
root = maxind | ||
|
||
|
||
cdef inline void heapsort( | ||
floating* values, | ||
ITYPE_t* samples, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be renamed to |
||
ITYPE_t n | ||
) nogil: | ||
cdef: | ||
ITYPE_t start = (n - 2) / 2 | ||
ITYPE_t end = n | ||
|
||
# heapify | ||
while True: | ||
sift_down(values, samples, start, end) | ||
if start == 0: | ||
break | ||
start -= 1 | ||
|
||
# sort by shrinking the heap, putting the max element immediately after it | ||
end = n - 1 | ||
while end > 0: | ||
dual_swap(values, samples, 0, end) | ||
sift_down(values, samples, 0, end) | ||
end = end - 1 | ||
|
||
|
||
cdef inline int simultaneous_sort( | ||
floating* values, | ||
ITYPE_t* indices, | ||
ITYPE_t size, | ||
bint use_introsort=0, | ||
) nogil: | ||
""" | ||
Perform a recursive quicksort on the values array as to sort them ascendingly. | ||
|
@@ -31,6 +84,10 @@ cdef int simultaneous_sort( | |
i = np.argsort(dist) | ||
return dist[i], idx[i] | ||
|
||
If use_introsort=1, then the introsort algorithm is used. This sorting algorithm | ||
switches from quicksort to heapsort when the recursion depth based on | ||
based on 2 * log2(size). | ||
|
||
Notes | ||
----- | ||
Arrays are manipulated via a pointer to there first element and their size | ||
|
@@ -41,6 +98,19 @@ cdef int simultaneous_sort( | |
# The best might be using a std::stable_sort and a Comparator which might need | ||
# an Array of Structures (AoS) instead of the Structure of Arrays (SoA) | ||
# currently used. | ||
if use_introsort == 1: | ||
_simultaneous_sort(values, indices, size, 2 * <int>log2(size), 1) | ||
else: | ||
_simultaneous_sort(values, indices, size, -1, 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The return type is Let's switch to |
||
|
||
|
||
cdef inline int _simultaneous_sort( | ||
floating* values, | ||
ITYPE_t* indices, | ||
ITYPE_t size, | ||
int max_depth, | ||
bint use_introsort, | ||
) nogil: | ||
cdef: | ||
ITYPE_t pivot_idx, i, store_idx | ||
floating pivot_val | ||
|
@@ -58,6 +128,8 @@ cdef int simultaneous_sort( | |
dual_swap(values, indices, 1, 2) | ||
if values[0] > values[1]: | ||
dual_swap(values, indices, 0, 1) | ||
elif use_introsort and max_depth <= 0: | ||
heapsort(values, indices, size) | ||
else: | ||
# Determine the pivot using the median-of-three rule. | ||
# The smallest of the three is moved to the beginning of the array, | ||
|
@@ -85,9 +157,9 @@ cdef int simultaneous_sort( | |
|
||
# Recursively sort each side of the pivot | ||
if pivot_idx > 1: | ||
simultaneous_sort(values, indices, pivot_idx) | ||
_simultaneous_sort(values, indices, pivot_idx, max_depth - 1, use_introsort) | ||
if pivot_idx + 2 < size: | ||
simultaneous_sort(values + pivot_idx + 1, | ||
indices + pivot_idx + 1, | ||
size - pivot_idx - 1) | ||
_simultaneous_sort(values + pivot_idx + 1, | ||
indices + pivot_idx + 1, | ||
size - pivot_idx - 1, max_depth - 1, use_introsort) | ||
return 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment for the private helper function: the return type should be void. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.