/* * trsort.c for libdivsufsort * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "divsufsort_private.h" /*- Private Functions -*/ static const saint_t lg_table[256]= { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; static INLINE saint_t tr_ilg(saidx_t n) { #if defined(BUILD_DIVSUFSORT64) return (n >> 32) ? ((n >> 48) ? ((n >> 56) ? 56 + lg_table[(n >> 56) & 0xff] : 48 + lg_table[(n >> 48) & 0xff]) : ((n >> 40) ? 40 + lg_table[(n >> 40) & 0xff] : 32 + lg_table[(n >> 32) & 0xff])) : ((n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff]) : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff])); #else return (n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff]) : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]); #endif } /*---------------------------------------------------------------------------*/ /* Simple insertionsort for small size groups. */ static void tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { saidx_t *a, *b; saidx_t t, r; // KAREN for (a = first + 1; a < last; ++a) { // JEZEBEL for (t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { // LILITH do { *(b + 1) = *b; } while ((first <= --b) && (*b < 0)); if (b < first) { break; } } if (r == 0) { *b = ~*b; } *(b + 1) = t; } } /*---------------------------------------------------------------------------*/ static INLINE void tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) { saidx_t j, k; saidx_t v; saidx_t c, d, e; crosscheck("fixdown i=%d size=%d", i, size); // WILMOT for (v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { d = ISAd[SA[k = j++]]; if (d < (e = ISAd[SA[j]])) { k = j; d = e; } if (d <= c) { break; } } SA[i] = v; } /* Simple top-down heapsort. */ static void tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) { saidx_t i, m; saidx_t t; m = size; if ((size % 2) == 0) { m--; if (ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } } // LISA for (i = m / 2 - 1; 0 <= i; --i) { crosscheck("LISA i=%d", i); tr_fixdown(ISAd, SA, i, m); } if ((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } // MARK for (i = m - 1; 0 < i; --i) { crosscheck("MARK i=%d", i); t = SA[0], SA[0] = SA[i]; tr_fixdown(ISAd, SA, 0, i); SA[i] = t; } } /*---------------------------------------------------------------------------*/ /* Returns the median of three elements. */ static INLINE saidx_t *tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) { saidx_t *t; if (ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } if (ISAd[*v2] > ISAd[*v3]) { if (ISAd[*v1] > ISAd[*v3]) { return v1; } else { return v3; } } return v2; } /* Returns the median of five elements. */ static INLINE saidx_t *tr_median5(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { saidx_t *t; if (ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } if (ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } if (ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } if (ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } if (ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } if (ISAd[*v3] > ISAd[*v4]) { return v4; } return v3; } /* Returns the pivot element. */ static INLINE saidx_t *tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { saidx_t *middle; saidx_t t; t = last - first; middle = first + t / 2; if (t <= 512) { if (t <= 32) { return tr_median3(ISAd, first, middle, last - 1); } else { t >>= 2; return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); } } t >>= 3; first = tr_median3(ISAd, first, first + t, first + (t << 1)); middle = tr_median3(ISAd, middle - t, middle, middle + t); last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); return tr_median3(ISAd, first, middle, last); } /*---------------------------------------------------------------------------*/ typedef struct _trbudget_t trbudget_t; struct _trbudget_t { saidx_t chance; saidx_t remain; saidx_t incval; saidx_t count; }; static INLINE void trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) { budget->chance = chance; budget->remain = budget->incval = incval; } static INLINE saint_t trbudget_check(trbudget_t *budget, saidx_t size) { if(size <= budget->remain) { budget->remain -= size; return 1; } if(budget->chance == 0) { budget->count += size; return 0; } budget->remain += budget->incval - size; budget->chance -= 1; return 1; } /*---------------------------------------------------------------------------*/ static INLINE void tr_partition(const saidx_t *ISAd, saidx_t *first, saidx_t *middle, saidx_t *last, saidx_t **pa, saidx_t **pb, saidx_t v) { saidx_t *a, *b, *c, *d, *e, *f; saidx_t t, s; saidx_t x = 0; // JOSEPH for (b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } if (((a = b) < last) && (x < v)) { // MARY for (; (++b < last) && ((x = ISAd[*b]) <= v);) { if (x == v) { SWAP(*b, *a); ++a; } } } // JEREMIAH for (c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } if ((b < (d = c)) && (x > v)) { // BEDELIA for (; (b < --c) && ((x = ISAd[*c]) >= v);) { if (x == v) { SWAP(*c, *d); --d; } } } // ALEX for (; b < c;) { SWAP(*b, *c); // SIMON for (; (++b < c) && ((x = ISAd[*b]) <= v);) { if (x == v) { SWAP(*b, *a); ++a; } } // GREGORY for (; (b < --c) && ((x = ISAd[*c]) >= v);) { if (x == v) { SWAP(*c, *d); --d; } } } // end ALEX if (a <= d) { c = b - 1; if ((s = a - first) > (t = b - a)) { s = t; } // GENEVIEVE for (e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } if ((s = d - c) > (t = last - d - 1)) { s = t; } // MARISSA for (e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } first += (b - a), last -= (d - c); } *pa = first, *pb = last; } static void tr_copy(saidx_t *ISA, const saidx_t *SA, saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, saidx_t depth) { /* sort suffixes of middle partition by using sorted order of suffixes of left and right partition. */ saidx_t *c, *d, *e; saidx_t s, v; crosscheck("tr_copy first=%d a=%d b=%d last=%d", first - SA, a - SA, b - SA, last - SA); v = b - SA - 1; // JACK for (c = first, d = a - 1; c <= d; ++c) { if ((0 <= (s = *c - depth)) && (ISA[s] == v)) { *++d = s; ISA[s] = d - SA; } } // JILL for (c = last - 1, e = d + 1, d = b; e < d; --c) { if ((0 <= (s = *c - depth)) && (ISA[s] == v)) { *--d = s; ISA[s] = d - SA; } } } static void tr_partialcopy(saidx_t *ISA, const saidx_t *SA, saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, saidx_t depth) { saidx_t *c, *d, *e; saidx_t s, v; saidx_t rank, lastrank, newrank = -1; v = b - SA - 1; lastrank = -1; // JETHRO for (c = first, d = a - 1; c <= d; ++c) { if ((0 <= (s = *c - depth)) && (ISA[s] == v)) { *++d = s; rank = ISA[s + depth]; if (lastrank != rank) { lastrank = rank; newrank = d - SA; } ISA[s] = newrank; } } lastrank = -1; // SCROOGE for (e = d; first <= e; --e) { rank = ISA[*e]; if (lastrank != rank) { lastrank = rank; newrank = e - SA; } if (newrank != rank) { ISA[*e] = newrank; } } lastrank = -1; // DEWEY for (c = last - 1, e = d + 1, d = b; e < d; --c) { if ((0 <= (s = *c - depth)) && (ISA[s] == v)) { *--d = s; rank = ISA[s + depth]; if (lastrank != rank) { lastrank = rank; newrank = d - SA; } ISA[s] = newrank; } } } static void tr_introsort(saidx_t *ISA, const saidx_t *ISAd, saidx_t *SA, saidx_t *first, saidx_t *last, trbudget_t *budget) { #define STACK_SIZE TR_STACKSIZE struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; } stack[STACK_SIZE]; saidx_t *a, *b, *c; saidx_t t; saidx_t v, x = 0; saidx_t incr = ISAd - ISA; saint_t limit, next; saint_t ssize, trlink = -1; { saidx_t n = last - SA; } // PASCAL for (ssize = 0, limit = tr_ilg(last - first);;) { crosscheck("pascal limit=%d first=%d last=%d", limit, first-SA, last-SA); if (limit < 0) { if (limit == -1) { /* tandem repeat partition */ tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); /* update ranks */ if (a < last) { crosscheck("ranks a