//# GenSort.cc: General sort functions //# Copyright (C) 1993,1994,1995,1996,1997,1998,1999,2000 //# Associated Universities, Inc. Washington DC, USA. //# //# This library is free software; you can redistribute it and/or modify it //# under the terms of the GNU Library General Public License as published by //# the Free Software Foundation; either version 2 of the License, or (at your //# option) any later version. //# //# This library is distributed in the hope that it will be useful, but WITHOUT //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public //# License for more details. //# //# You should have received a copy of the GNU Library General Public License //# along with this library; if not, write to the Free Software Foundation, //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. //# //# Correspondence concerning AIPS++ should be addressed as follows: //# Internet email: aips2-request@nrao.edu. //# Postal address: AIPS++ Project Office //# National Radio Astronomy Observatory //# 520 Edgemont Road //# Charlottesville, VA 22903-2475 USA //# //# $Id$ #ifndef CASA_GENSORT_TCC #define CASA_GENSORT_TCC #include #include #include #include #include #include #include #include #ifdef _OPENMP # include #endif namespace casacore { //# NAMESPACE CASACORE - BEGIN // Do a quicksort in ascending order. // All speedups are from Sedgewick; Algorithms in C. template void GenSort::quickSortAsc (T* data, Int nr, Bool multiThread, Int rec_lim) { // QuickSorting small sets makes no sense. // It will be finished with an insertion sort. // The number 32 is determined experimentally. It is not very critical. if (nr <= 32) { return; } // not enough progress, abort into runtime limited heapsort if (rec_lim < 0) { heapSortAsc(data, nr); return; } // Choose a partition element by taking the median of the // first, middle and last element. // Store the partition element at the end. // Do not use Sedgewick\'s advise to store the partition element in // data[nr-2]. This has dramatic results for reversed ordered arrays. Int i = (nr-1)/2; // middle element T* sf = data; // first element T* sl = data+nr-1; // last element if (data[i] < *sf) swap (data[i], *sf); if (*sl < *sf) swap (*sl, *sf); if (data[i] < *sl) swap (data[i], *sl); T par = *sl; // partition element // Now partition until the pointers cross. for (;;) { while (*++sf < par) ; while (*--sl > par) ; if (sf >= sl) break; swap (*sf, *sl); } swap (*sf, data[nr-1]); i = sf-data; if (multiThread) { /* limit threads to what the code can do to not span unnecessary * workers */ #ifdef _OPENMP int nthreads = std::min(2, omp_get_max_threads()); /* TODO parallel for only uses 2 threads of the group, should use tasks * only parallelize when work time ~ barrier spin time (3ms) * otherwise oversubscription kills performance */ #pragma omp parallel for num_threads(nthreads) if (nr > 500000) #endif for (int thr=0; thr<2; ++thr) { if (thr==0) quickSortAsc (data, i, False, rec_lim - 1); // sort left part if (thr==1) quickSortAsc (sf+1, nr-i-1, False, rec_lim - 1); // sort right part } } else { quickSortAsc (data, i, False, rec_lim - 1); // sort left part quickSortAsc (sf+1, nr-i-1, False, rec_lim - 1); // sort right part } } // Find the k-th largest element using a partial quicksort. template T GenSort::kthLargest (T* data, uInt nr, uInt k) { if (k >= nr) { throw (AipsError ("kthLargest(data, nr, k): k must be < nr")); } Int st = 0; Int end = Int(nr) - 1; // Partition until a set of 1 or 2 elements is left. while (end > st+1) { // Choose a partition element by taking the median of the // first, middle and last element. // Store the partition element at the end. // Do not use Sedgewick\'s advise to store the partition element in // data[nr-2]. This has dramatic results for reversed ordered arrays. Int i = (st+end)/2; // middle element T* sf = data+st; // first element T* sl = data+end; // last element if (data[i] < *sf) swap (data[i], *sf); if (*sl < *sf) swap (*sl, *sf); if (data[i] < *sl) swap (data[i], *sl); T par = *sl; // partition element // Now partition until the pointers cross. for (;;) { while (*++sf < par) ; while (*--sl > par) ; if (sf >= sl) break; swap (*sf, *sl); } swap (*sf, data[end]); // Determine index of partitioning and update the start and end // to take left or right part. i = sf-data; if (i <= Int(k)) st = i; if (i >= Int(k)) end = i; } if (end == st+1) { if (data[st] > data[end]) { swap (data[st], data[end]); } } return data[k]; } // Do an insertion sort in ascending order. template uInt GenSort::insSortAsc (T* data, Int nr, int opt) { if ((opt & Sort::NoDuplicates) == 0) { return insSortAscDup (data, nr); } return insSortAscNoDup (data, nr); } // Do an insertion sort in ascending order. // Keep duplicate elements. template uInt GenSort::insSortAscDup (T* data, Int nr) { Int j; T cur; for (Int i=1; i0 && data[j-1] > cur) { data[j] = data[j-1]; j--; } data[j] = cur; } return nr; } // Do an insertion sort in ascending order. // Skip duplicate elements. template uInt GenSort::insSortAscNoDup (T* data, Int nr) { if (nr < 2) { return nr; // nothing to sort } Int j, k; T cur; Int n = 1; for (Int i=1; i0 && data[j-1] > cur) { j--; } if (j <= 0 || !(data[j-1] == cur)) { // no equal key for (k=n-1; k>=j; k--) { data[k+1] = data[k]; // now shift to right } data[j] = cur; // insert in right place n++; } } return n; } // Do a heapsort in ascending order. template void GenSort::heapSortAsc (T* data, Int nr) { // Use the heapsort algorithm described by Jon Bentley in // UNIX Review, August 1992. data--; Int j; for (j=nr/2; j>=1; j--) { heapAscSiftDown (j, nr, data); } for (j=nr; j>=2; j--) { swap (data[1], data[j]); heapAscSiftDown (1, j-1, data); } } template void GenSort::heapAscSiftDown (Int low, Int up, T* data) { T sav = data[low]; Int c; Int i; for (i=low; (c=2*i)<=up; i=c) { if (c < up && data[c+1] > data[c]) { c++; } data[i] = data[c]; } data[i] = sav; for ( ; (c=i/2)>= low; i=c) { if (!(data[i] > data[c])) { break; } swap (data[c], data[i]); } } template uInt GenSort::parSort (T* data, uInt nr, Sort::Order ord, int opt, int nthread) { int nthr = nthread; // to avoid compiler warning #ifdef _OPENMP if (nthread > 0) { nthr = nthread; // Do not use more threads than there are values. if (uInt(nthr) > nr) nthr = nr; } else { nthr = omp_get_max_threads(); if (uInt(nthr) > nr) nthr = nr; } if (nthr == 0) nthr = 1; #else nthr = 1; #endif Block index(nr+1); Block tinx(nthr+1); Block np(nthr); // Determine ordered parts in the array. // It is done in parallel, whereafter the parts are combined. int step = nr/nthr; for (int i=0; i data[j]) { index[tinx[i]+nparts] = j; // out of order, thus new part nparts++; } } np[i] = nparts; } // Make index parts consecutive by shifting to the left. // See if last and next part can be combined. uInt nparts = np[0]; for (int i=1; i data[tinx[i]]) { index[nparts++] = index[tinx[i]]; } if (nparts == tinx[i]+1) { nparts += np[i]-1; } else { for (uInt j=1; j indexVector(nr); indgen(indexVector); INX* inx = indexVector.data(); INX st = 0; INX end = INX(nr) - 1; // Partition until a set of 1 or 2 elements is left. while (end > st+1) { // Choose a partition element by taking the median of the // first, middle and last element. // Store the partition element at the end. // Do not use Sedgewick\'s advise to store the partition element in // data[nr-2]. This has dramatic results for reversed ordered arrays. INX i = (st+end)/2; // middle element INX* sf = inx+st; // first element INX* sl = inx+end; // last element if (data[inx[i]] < data[*sf]) swapInx (inx[i], *sf); if (data[*sl] < data[*sf]) swapInx (*sl, *sf); if (data[inx[i]] < data[*sl]) swapInx (inx[i], *sl); T partVal = data[*sl]; // partition element // Now partition until the pointers cross. for (;;) { while (data[*++sf] < partVal) ; while (data[*--sl] > partVal) ; if (sf >= sl) break; swapInx (*sf, *sl); } swapInx (*sf, inx[end]); // Determine index of partitioning and update the start and end // to take left or right part. i = sf-inx; if (i <= INX(k)) st = i; if (i >= INX(k)) end = i; } if (end == st+1) { if (data[inx[st]] > data[inx[end]]) { swapInx (inx[st], inx[end]); } } return inx[k]; } // Do an insertion sort in ascending order. template INX GenSortIndirect::insSortAsc (INX* inx, const T* data, INX nr, int opt) { if ((opt & Sort::NoDuplicates) == 0) { return insSortAscDup (inx, data, nr); }else{ return insSortAscNoDup (inx, data, nr); } } // Do an insertion sort in ascending order. // Keep duplicate elements. template INX GenSortIndirect::insSortAscDup (INX* inx, const T* data, INX nr) { for (INX i=1; i0 && isAscending (data, inx[j-1], cur)) { inx[j] = inx[j-1]; j--; } inx[j] = cur; } return nr; } // Do an insertion sort in ascending order. // Skip duplicate elements. template INX GenSortIndirect::insSortAscNoDup (INX* inx, const T* data, INX nr) { if (nr < 2) { return nr; // nothing to sort } INX n = 1; for (INX i=1; i0 && data[inx[j-1]] > data[cur]) { j--; } if (j <= 0 || !(data[inx[j-1]] == data[cur])) { // no equal key for (Int64 k=n-1; k>=j; k--) { inx[k+1] = inx[k]; // now shift to right } inx[j] = cur; // insert in right place n++; } } return n; } // Do a heapsort in ascending order. template void GenSortIndirect::heapSortAsc (INX* inx, const T* data, INX nr) { // Use the heapsort algorithm described by Jon Bentley in // UNIX Review, August 1992. inx--; INX j; for (j=nr/2; j>=1; j--) { heapAscSiftDown (inx, j, nr, data); } for (j=nr; j>=2; j--) { swapInx (inx[1], inx[j]); heapAscSiftDown (inx, 1, j-1, data); } } template void GenSortIndirect::heapAscSiftDown (INX* inx, INX low, INX up, const T* data) { INX sav = inx[low]; INX c; INX i; for (i=low; (c=2*i)<=up; i=c) { if (c < up && isAscending (data, inx[c+1], inx[c])) { c++; } inx[i] = inx[c]; } inx[i] = sav; for ( ; (c=i/2)>= low; i=c) { if (isAscending (data, inx[c], inx[i])) { break; } swapInx (inx[c], inx[i]); } } } //# NAMESPACE CASACORE - END #endif