39
39
#include <cstdlib>
40
40
#include <utility>
41
41
42
- /* enough for 32 * 1.618 ** 128 elements */
43
- #define TIMSORT_STACK_SIZE 128
42
+ /* enough for 32 * 1.618 ** 128 elements.
43
+ If powersort was used in all cases, 90 would suffice, as 32 * 2 ** 90 >= 32 * 1.618 ** 128 */
44
+ #define RUN_STACK_SIZE 128
44
45
45
46
static npy_intp
46
47
compute_min_run (npy_intp num )
@@ -58,6 +59,7 @@ compute_min_run(npy_intp num)
58
59
typedef struct {
59
60
npy_intp s ; /* start pointer */
60
61
npy_intp l ; /* length */
62
+ int power ; /* node "level" for powersort merge strategy */
61
63
} run ;
62
64
63
65
/* buffer for argsort. Declared here to avoid multiple declarations. */
@@ -383,60 +385,51 @@ merge_at_(type *arr, const run *stack, const npy_intp at, buffer_<Tag> *buffer)
383
385
return 0 ;
384
386
}
385
387
386
- template < typename Tag , typename type >
388
+ /* See https://github.com/python/cpython/blob/ea23c897cd25702e72a04e06664f6864f07a7c5d/Objects/listsort.txt
389
+ * for a detailed explanation.
390
+ * In CPython, *num* is called *n*, but we changed it for consistency with the NumPy implementation.
391
+ */
387
392
static int
388
- try_collapse_ ( type * arr , run * stack , npy_intp * stack_ptr , buffer_ < Tag > * buffer )
393
+ powerloop ( npy_intp s1 , npy_intp n1 , npy_intp n2 , npy_intp num )
389
394
{
390
- int ret ;
391
- npy_intp A , B , C , top ;
392
- top = * stack_ptr ;
393
-
394
- while (1 < top ) {
395
- B = stack [top - 2 ].l ;
396
- C = stack [top - 1 ].l ;
397
-
398
- if ((2 < top && stack [top - 3 ].l <= B + C ) ||
399
- (3 < top && stack [top - 4 ].l <= stack [top - 3 ].l + B )) {
400
- A = stack [top - 3 ].l ;
401
-
402
- if (A <= C ) {
403
- ret = merge_at_ < Tag > (arr , stack , top - 3 , buffer );
404
-
405
- if (NPY_UNLIKELY (ret < 0 )) {
406
- return ret ;
407
- }
408
-
409
- stack [top - 3 ].l += B ;
410
- stack [top - 2 ] = stack [top - 1 ];
411
- -- top ;
412
- }
413
- else {
414
- ret = merge_at_ < Tag > (arr , stack , top - 2 , buffer );
415
-
416
- if (NPY_UNLIKELY (ret < 0 )) {
417
- return ret ;
418
- }
419
-
420
- stack [top - 2 ].l += C ;
421
- -- top ;
422
- }
395
+ int result = 0 ;
396
+ npy_intp a = 2 * s1 + n1 ; /* 2*a */
397
+ npy_intp b = a + n1 + n2 ; /* 2*b */
398
+ for (;;) {
399
+ ++ result ;
400
+ if (a >= num ) { /* both quotient bits are 1 */
401
+ a -= num ;
402
+ b -= num ;
423
403
}
424
- else if (1 < top && B <= C ) {
425
- ret = merge_at_ < Tag > (arr , stack , top - 2 , buffer );
404
+ else if (b >= num ) { /* a/num bit is 0, b/num bit is 1 */
405
+ break ;
406
+ }
407
+ a <<= 1 ;
408
+ b <<= 1 ;
409
+ }
410
+ return result ;
411
+ }
426
412
413
+ template < typename Tag , typename type >
414
+ static int
415
+ found_new_run_ (type * arr , run * stack , npy_intp * stack_ptr , npy_intp n2 ,
416
+ npy_intp num , buffer_ < Tag > * buffer )
417
+ {
418
+ int ret ;
419
+ if (* stack_ptr > 0 ) {
420
+ npy_intp s1 = stack [* stack_ptr - 1 ].s ;
421
+ npy_intp n1 = stack [* stack_ptr - 1 ].l ;
422
+ int power = powerloop (s1 , n1 , n2 , num );
423
+ while (* stack_ptr > 1 && stack [* stack_ptr - 2 ].power > power ) {
424
+ ret = merge_at_ < Tag > (arr , stack , * stack_ptr - 2 , buffer );
427
425
if (NPY_UNLIKELY (ret < 0 )) {
428
426
return ret ;
429
427
}
430
-
431
- stack [top - 2 ].l += C ;
432
- -- top ;
433
- }
434
- else {
435
- break ;
428
+ stack [* stack_ptr - 2 ].l += stack [* stack_ptr - 1 ].l ;
429
+ -- (* stack_ptr );
436
430
}
431
+ stack [* stack_ptr - 1 ].power = power ;
437
432
}
438
-
439
- * stack_ptr = top ;
440
433
return 0 ;
441
434
}
442
435
@@ -491,23 +484,22 @@ timsort_(void *start, npy_intp num)
491
484
int ret ;
492
485
npy_intp l , n , stack_ptr , minrun ;
493
486
buffer_ < Tag > buffer ;
494
- run stack [TIMSORT_STACK_SIZE ];
487
+ run stack [RUN_STACK_SIZE ];
495
488
buffer .pw = NULL ;
496
489
buffer .size = 0 ;
497
490
stack_ptr = 0 ;
498
491
minrun = compute_min_run (num );
499
492
500
493
for (l = 0 ; l < num ;) {
501
494
n = count_run_ < Tag > ((type * )start , l , num , minrun );
495
+ ret = found_new_run_ < Tag > ((type * )start , stack , & stack_ptr , n , num , & buffer );
496
+ if (NPY_UNLIKELY (ret < 0 ))
497
+ goto cleanup ;
498
+
499
+ // Push the new run onto the stack.
502
500
stack [stack_ptr ].s = l ;
503
501
stack [stack_ptr ].l = n ;
504
502
++ stack_ptr ;
505
- ret = try_collapse_ < Tag > ((type * )start , stack , & stack_ptr , & buffer );
506
-
507
- if (NPY_UNLIKELY (ret < 0 )) {
508
- goto cleanup ;
509
- }
510
-
511
503
l += n ;
512
504
}
513
505
@@ -897,7 +889,7 @@ atimsort_(void *v, npy_intp *tosort, npy_intp num)
897
889
int ret ;
898
890
npy_intp l , n , stack_ptr , minrun ;
899
891
buffer_intp buffer ;
900
- run stack [TIMSORT_STACK_SIZE ];
892
+ run stack [RUN_STACK_SIZE ];
901
893
buffer .pw = NULL ;
902
894
buffer .size = 0 ;
903
895
stack_ptr = 0 ;
@@ -1371,7 +1363,7 @@ string_timsort_(void *start, npy_intp num, void *varr)
1371
1363
size_t len = elsize / sizeof (type );
1372
1364
int ret ;
1373
1365
npy_intp l , n , stack_ptr , minrun ;
1374
- run stack [TIMSORT_STACK_SIZE ];
1366
+ run stack [RUN_STACK_SIZE ];
1375
1367
string_buffer_ < Tag > buffer ;
1376
1368
1377
1369
/* Items that have zero size don't make sense to sort */
@@ -1800,7 +1792,7 @@ string_atimsort_(void *start, npy_intp *tosort, npy_intp num, void *varr)
1800
1792
size_t len = elsize / sizeof (type );
1801
1793
int ret ;
1802
1794
npy_intp l , n , stack_ptr , minrun ;
1803
- run stack [TIMSORT_STACK_SIZE ];
1795
+ run stack [RUN_STACK_SIZE ];
1804
1796
buffer_intp buffer ;
1805
1797
1806
1798
/* Items that have zero size don't make sense to sort */
@@ -2253,7 +2245,7 @@ npy_timsort(void *start, npy_intp num, void *varr)
2253
2245
PyArray_CompareFunc * cmp = PyDataType_GetArrFuncs (PyArray_DESCR (arr ))-> compare ;
2254
2246
int ret ;
2255
2247
npy_intp l , n , stack_ptr , minrun ;
2256
- run stack [TIMSORT_STACK_SIZE ];
2248
+ run stack [RUN_STACK_SIZE ];
2257
2249
buffer_char buffer ;
2258
2250
2259
2251
/* Items that have zero size don't make sense to sort */
@@ -2689,7 +2681,7 @@ npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr)
2689
2681
PyArray_CompareFunc * cmp = PyDataType_GetArrFuncs (PyArray_DESCR (arr ))-> compare ;
2690
2682
int ret ;
2691
2683
npy_intp l , n , stack_ptr , minrun ;
2692
- run stack [TIMSORT_STACK_SIZE ];
2684
+ run stack [RUN_STACK_SIZE ];
2693
2685
buffer_intp buffer ;
2694
2686
2695
2687
/* Items that have zero size don't make sense to sort */
0 commit comments