/* @(#)isort.c 1.19 09/07/11 J. Schilling from cdparanoia-III-alpha9.8 */ #include #ifndef lint static UConst char sccsid[] = "@(#)isort.c 1.19 09/07/11 J. Schilling from cdparanoia-III-alpha9.8"; #endif /* * CopyPolicy: GNU Lesser General Public License v2.1 applies * Copyright (C) 1997-2001,2008 by Monty (xiphmont@mit.edu) * Copyright (C) 2002-2009 by J. Schilling * * sorted vector abstraction for paranoia * */ /* * Old isort got a bit complex. This re-constrains complexity to * give a go at speed through a more alpha-6-like mechanism. */ /* * "Sort" is a bit of a misnomer in this implementation. It's actually * basically a hash table of sample values (with a linked-list collision * resolution), which lets you quickly determine where in a vector a * particular sample value occurs. * * Collisions aren't due to hash collisions, as the table has one bucket * for each possible sample value. Instead, the "collisions" represent * multiple occurrences of a given value. */ #include #include #include #include #include "p_block.h" #include "isort.h" #include "pmalloc.h" EXPORT sort_info *sort_alloc __PR((long size)); EXPORT void sort_unsortall __PR((sort_info * i)); EXPORT void sort_free __PR((sort_info * i)); LOCAL void sort_sort __PR((sort_info * i, long sortlo, long sorthi)); EXPORT void sort_setup __PR((sort_info * i, Int16_t * vector, long *abspos, long size, long sortlo, long sorthi)); EXPORT sort_link *sort_getmatch __PR((sort_info * i, long post, long overlap, int value)); EXPORT sort_link *sort_nextmatch __PR((sort_info * i, sort_link * prev)); /* * sort_alloc() * * Allocates and initializes a new, empty sort_info object, which can be * used to index up to (size) samples from a vector. */ EXPORT sort_info * sort_alloc(size) long size; { sort_info *ret = _pcalloc(1, sizeof (sort_info)); ret->vector = NULL; ret->sortbegin = -1; ret->size = -1; ret->maxsize = size; ret->head = _pcalloc(65536, sizeof (sort_link *)); ret->bucketusage = _pmalloc(65536 * sizeof (long)); ret->revindex = _pcalloc(size, sizeof (sort_link)); ret->lastbucket = 0; return (ret); } /* * sort_unsortall() (internal) * * This function resets the index for further use with a different vector * or range, without the overhead of an unnecessary free/alloc. */ EXPORT void sort_unsortall(i) sort_info *i; { /* * If there were few enough different samples encountered (and hence few * enough buckets used), we can just zero out those buckets. If there * were many (2000 is picked somewhat arbitrarily), it's faster simply to * zero out all buckets with a memset() rather than walking the data * structure and zeroing them out one by one. */ if (i->lastbucket > 2000) { /* a guess */ memset(i->head, 0, 65536 * sizeof (sort_link *)); } else { long b; for (b = 0; b < i->lastbucket; b++) i->head[i->bucketusage[b]] = NULL; } i->lastbucket = 0; i->sortbegin = -1; /* * Curiously, this function preserves the vector association created * by sort_setup(), but it is used only internally by sort_setup, so * preserving this association is unnecessary. */ } /* * sort_free() * * Releases all memory consumed by a sort_info object. */ EXPORT void sort_free(i) sort_info *i; { _pfree(i->revindex); _pfree(i->head); _pfree(i->bucketusage); _pfree(i); } /* * sort_sort() (internal) * * This function builds the index to allow for fast searching for sample * values within a portion (sortlo - sorthi) of the object's associated * vector. It is called internally and only when needed. */ LOCAL void sort_sort(i, sortlo, sorthi) sort_info *i; long sortlo; long sorthi; { long j; /* * We walk backward through the range to index because we insert new * samples at the head of each bucket's list. At the end, they'll be * sorted from first to last occurrence. */ for (j = sorthi - 1; j >= sortlo; j--) { /* * i->vector[j] = the signed 16-bit sample to index. * hv = pointer to the head of the sorted list of occurences * of this sample * l = the node to associate with this sample * * We add 32768 to convert the signed 16-bit integer to an unsigned * range from 0 to 65535. * * Note that l is located within i->revindex at a position * corresponding to the sample's position in the vector. This allows * ipos() to determine the sample position from a returned sort_link. */ sort_link **hv = i->head + i->vector[j] + 32768; sort_link *l = i->revindex + j; /* * If this is the first time we've encountered this sample, add its * bucket to the list of buckets used. This list is used only for * resetting the index quickly. */ if (*hv == NULL) { i->bucketusage[i->lastbucket] = i->vector[j] + 32768; i->lastbucket++; } /* * Point the new node at the old head, then assign the new node as * the new head. */ l->next = *hv; *hv = l; } /* * Mark the index as initialized. */ i->sortbegin = 0; } /* * sort_setup() * * This function initializes a previously allocated sort_info_t. The * sort_info_t is associated with a vector of samples of length * (size), whose position begins at (*abspos) within the CD's stream * of samples. Only the range of samples between (sortlo, sorthi) * will eventually be indexed for fast searching. (sortlo, sorthi) * are absolute sample positions. * * Note: size *must* be <= i->maxsize given to the preceding sort_alloc(), * but no error checking is done here. */ EXPORT void sort_setup(i, vector, abspos, size, sortlo, sorthi) sort_info *i; Int16_t *vector; long *abspos; long size; long sortlo; long sorthi; { /* * Reset the index if it has already been built. */ if (i->sortbegin != -1) sort_unsortall(i); i->vector = vector; i->size = size; i->abspos = abspos; /* * Convert the absolute (sortlo, sorthi) to offsets within the vector. * Note that the index will not be built until sort_getmatch() is called. * Here we're simply hanging on to the range to index until then. */ i->lo = min(size, max(sortlo - *abspos, 0)); i->hi = max(0, min(sorthi - *abspos, size)); } /* * sort_getmatch() * * This function returns a sort_link_t pointer which refers to the * first sample equal to (value) in the vector. It only searches for * hits within (overlap) samples of (post), where (post) is an offset * within the vector. The caller can determine the position of the * matched sample using ipos(sort_info *, sort_link *). * * This function returns NULL if no matches were found. */ EXPORT sort_link * sort_getmatch(i, post, overlap, value) sort_info *i; long post; long overlap; int value; { sort_link *ret; /* * If the vector hasn't been indexed yet, index it now. */ if (i->sortbegin == -1) sort_sort(i, i->lo, i->hi); /* * Now we reuse lo and hi * * We'll only return samples within (overlap) samples of (post). * Clamp the boundaries to search to the boundaries of the array, * convert the signed sample to an unsigned offset, and store the * state so that future calls to sort_nextmatch do the right thing. * * Reusing lo and hi this way is awful. */ post = max(0, min(i->size, post)); i->val = value + 32768; i->lo = max(0, post - overlap); /* absolute position */ i->hi = min(i->size, post + overlap); /* absolute position */ /* * Walk through the linked list of samples with this value, until * we find the first one within the bounds specified. If there * aren't any, return NULL. */ ret = i->head[i->val]; while (ret) { /* * ipos() calculates the offset (in terms of the original vector) * of this hit. */ if (ipos(i, ret) < i->lo) { ret = ret->next; } else { if (ipos(i, ret) >= i->hi) ret = NULL; break; } } /* i->head[i->val]=ret; */ return (ret); } /* * sort_nextmatch() * * This function returns a sort_link_t pointer which refers to the next sample * matching the criteria previously passed to sort_getmatch(). See * sort_getmatch() for details. * * This function returns NULL if no further matches were found. */ EXPORT sort_link * sort_nextmatch(i, prev) sort_info *i; sort_link *prev; { sort_link *ret = prev->next; /* * If there aren't any more hits, or we've passed the boundary requested * of sort_getmatch(), we're done. */ if (!ret || ipos(i, ret) >= i->hi) return (NULL); return (ret); }