1 //Templated spread_sort library
2 
3 //          Copyright Steven J. Ross 2001 - 2009.
4 // Distributed under the Boost Software License, Version 1.0.
5 //    (See accompanying file LICENSE_1_0.txt or copy at
6 //          http://www.boost.org/LICENSE_1_0.txt)
7 
8 //  See http://www.boost.org/ for updates, documentation, and revision history.
9 
10 /*
11 Some improvements suggested by:
12 Phil Endecott and Frank Gennari
13 Cygwin fix provided by:
14 Scott McMurray
15 */
16 
17 #ifndef BOOST_SPREAD_SORT_H
18 #define BOOST_SPREAD_SORT_H
19 #include <algorithm>
20 #include <cstring>
21 #include <vector>
22 #include "webrtc/system_wrappers/source/spreadsortlib/constants.hpp"
23 
24 #ifdef getchar
25 // This file should not use getchar as a template parameter name.
26 #undef getchar
27 #endif
28 
29 namespace boost {
30   namespace detail {
31   	//This only works on unsigned data types
32   	template <typename T>
33   	inline unsigned
rough_log_2_size(const T & input)34   	rough_log_2_size(const T& input)
35   	{
36   		unsigned result = 0;
37   		//The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance
38   		while((input >> result) && (result < (8*sizeof(T)))) ++result;
39   		return result;
40   	}
41 
42   	//Gets the maximum size which we'll call spread_sort on to control worst-case performance
43   	//Maintains both a minimum size to recurse and a check of distribution size versus count
44   	//This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead
45   	inline size_t
get_max_count(unsigned log_range,size_t count)46   	get_max_count(unsigned log_range, size_t count)
47   	{
48   		unsigned divisor = rough_log_2_size(count);
49   		//Making sure the divisor is positive
50   		if(divisor > LOG_MEAN_BIN_SIZE)
51   			divisor -= LOG_MEAN_BIN_SIZE;
52   		else
53   			divisor = 1;
54   		unsigned relative_width = (LOG_CONST * log_range)/((divisor > MAX_SPLITS) ? MAX_SPLITS : divisor);
55   		//Don't try to bitshift more than the size of an element
56   		if((8*sizeof(size_t)) <= relative_width)
57   			relative_width = (8*sizeof(size_t)) - 1;
58   		return (size_t)1 << ((relative_width < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ?
59   			(LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) :  relative_width);
60   	}
61 
62   	//Find the minimum and maximum using <
63   	template <class RandomAccessIter>
64   	inline void
find_extremes(RandomAccessIter current,RandomAccessIter last,RandomAccessIter & max,RandomAccessIter & min)65   	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min)
66   	{
67   		min = max = current;
68   		//Start from the second item, as max and min are initialized to the first
69   		while(++current < last) {
70   			if(*max < *current)
71   				max = current;
72   			else if(*current < *min)
73   				min = current;
74   		}
75   	}
76 
77   	//Uses a user-defined comparison operator to find minimum and maximum
78   	template <class RandomAccessIter, class compare>
79   	inline void
find_extremes(RandomAccessIter current,RandomAccessIter last,RandomAccessIter & max,RandomAccessIter & min,compare comp)80   	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min, compare comp)
81   	{
82   		min = max = current;
83   		while(++current < last) {
84   			if(comp(*max, *current))
85   				max = current;
86   			else if(comp(*current, *min))
87   				min = current;
88   		}
89   	}
90 
91   	//Gets a non-negative right bit shift to operate as a logarithmic divisor
92   	inline int
get_log_divisor(size_t count,unsigned log_range)93   	get_log_divisor(size_t count, unsigned log_range)
94   	{
95   		int log_divisor;
96   		//If we can finish in one iteration without exceeding either (2 to the MAX_SPLITS) or n bins, do so
97   		if((log_divisor = log_range - rough_log_2_size(count)) <= 0 && log_range < MAX_SPLITS)
98   			log_divisor = 0;
99   		else {
100   			//otherwise divide the data into an optimized number of pieces
101   			log_divisor += LOG_MEAN_BIN_SIZE;
102   			if(log_divisor < 0)
103   				log_divisor = 0;
104   			//Cannot exceed MAX_SPLITS or cache misses slow down bin lookups dramatically
105   			if((log_range - log_divisor) > MAX_SPLITS)
106   				log_divisor = log_range - MAX_SPLITS;
107   		}
108   		return log_divisor;
109   	}
110 
111   	template <class RandomAccessIter>
112   	inline RandomAccessIter *
size_bins(std::vector<size_t> & bin_sizes,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,unsigned & cache_end,unsigned bin_count)113   	size_bins(std::vector<size_t> &bin_sizes, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
114   	{
115   		//Assure space for the size of each bin, followed by initializing sizes
116   		if(bin_count > bin_sizes.size())
117   			bin_sizes.resize(bin_count);
118   		for(size_t u = 0; u < bin_count; u++)
119   			bin_sizes[u] = 0;
120   		//Make sure there is space for the bins
121   		cache_end = cache_offset + bin_count;
122   		if(cache_end > bin_cache.size())
123   			bin_cache.resize(cache_end);
124   		return &(bin_cache[cache_offset]);
125   	}
126 
127   	//Implementation for recursive integer sorting
128   	template <class RandomAccessIter, class div_type, class data_type>
129   	inline void
spread_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)130   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
131   				  , std::vector<size_t> &bin_sizes)
132   	{
133   		//This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data
134   		//If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration
135   		RandomAccessIter max, min;
136   		find_extremes(first, last, max, min);
137   		//max and min will be the same (the first item) iff all values are equivalent
138   		if(max == min)
139   			return;
140   		RandomAccessIter * target_bin;
141   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(*max >> 0) - (*min >> 0)));
142   		div_type div_min = *min >> log_divisor;
143   		div_type div_max = *max >> log_divisor;
144   		unsigned bin_count = div_max - div_min + 1;
145   		unsigned cache_end;
146   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
147 
148   		//Calculating the size of each bin; this takes roughly 10% of runtime
149   		for (RandomAccessIter current = first; current != last;)
150   			bin_sizes[(*(current++) >> log_divisor) - div_min]++;
151   		//Assign the bin positions
152   		bins[0] = first;
153   		for(unsigned u = 0; u < bin_count - 1; u++)
154   			bins[u + 1] = bins[u] + bin_sizes[u];
155 
156   		//Swap into place
157   		//This dominates runtime, mostly in the swap and bin lookups
158   		RandomAccessIter nextbinstart = first;
159   		for(unsigned u = 0; u < bin_count - 1; ++u) {
160   			RandomAccessIter * local_bin = bins + u;
161   			nextbinstart += bin_sizes[u];
162   			//Iterating over each element in this bin
163   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
164   				//Swapping elements in current into place until the correct element has been swapped in
165   				for(target_bin = (bins + ((*current >> log_divisor) - div_min));  target_bin != local_bin;
166   					target_bin = bins + ((*current >> log_divisor) - div_min)) {
167   					//3-way swap; this is about 1% faster than a 2-way swap with integers
168   					//The main advantage is less copies are involved per item put in the correct place
169   					data_type tmp;
170   					RandomAccessIter b = (*target_bin)++;
171   					RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
172   					if (b_bin != local_bin) {
173   						RandomAccessIter c = (*b_bin)++;
174   						tmp = *c;
175   						*c = *b;
176   					}
177   					else
178   						tmp = *b;
179   					*b = *current;
180   					*current = tmp;
181   				}
182   			}
183   			*local_bin = nextbinstart;
184   		}
185   		bins[bin_count - 1] = last;
186 
187   		//If we've bucketsorted, the array is sorted and we should skip recursion
188   		if(!log_divisor)
189   			return;
190 
191   		//Recursing; log_divisor is the remaining range
192   		size_t max_count = get_max_count(log_divisor, last - first);
193   		RandomAccessIter lastPos = first;
194   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
195   			size_t count = bin_cache[u] - lastPos;
196   			//don't sort unless there are at least two items to compare
197   			if(count < 2)
198   				continue;
199   			//using std::sort if its worst-case is better
200   			if(count < max_count)
201   				std::sort(lastPos, bin_cache[u]);
202   			else
203   				spread_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
204   		}
205   	}
206 
207   	//Generic bitshift-based 3-way swapping code
208   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
inner_swap_loop(RandomAccessIter * bins,const RandomAccessIter & nextbinstart,unsigned ii,right_shift & shift,const unsigned log_divisor,const div_type div_min)209   	inline void inner_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
210   		, const unsigned log_divisor, const div_type div_min)
211   	{
212   		RandomAccessIter * local_bin = bins + ii;
213   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
214   			for(RandomAccessIter * target_bin = (bins + (shift(*current, log_divisor) - div_min));  target_bin != local_bin;
215   				target_bin = bins + (shift(*current, log_divisor) - div_min)) {
216   				data_type tmp;
217   				RandomAccessIter b = (*target_bin)++;
218   				RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min);
219   				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
220   				if (b_bin != local_bin) {
221   					RandomAccessIter c = (*b_bin)++;
222   					tmp = *c;
223   					*c = *b;
224   				}
225   				//Note: we could increment current once the swap is done in this case, but that seems to impair performance
226   				else
227   					tmp = *b;
228   				*b = *current;
229   				*current = tmp;
230   			}
231   		}
232   		*local_bin = nextbinstart;
233   	}
234 
235   	//Standard swapping wrapper for ascending values
236   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
swap_loop(RandomAccessIter * bins,RandomAccessIter & nextbinstart,unsigned ii,right_shift & shift,const std::vector<size_t> & bin_sizes,const unsigned log_divisor,const div_type div_min)237   	inline void swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
238   		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
239   	{
240   		nextbinstart += bin_sizes[ii];
241   		inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, log_divisor, div_min);
242   	}
243 
244   	//Functor implementation for recursive sorting
245   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
246   	inline void
spread_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift,compare comp)247   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
248   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
249   	{
250   		RandomAccessIter max, min;
251   		find_extremes(first, last, max, min, comp);
252   		if(max == min)
253   			return;
254   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
255   		div_type div_min = shift(*min, log_divisor);
256   		div_type div_max = shift(*max, log_divisor);
257   		unsigned bin_count = div_max - div_min + 1;
258   		unsigned cache_end;
259   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
260 
261   		//Calculating the size of each bin
262   		for (RandomAccessIter current = first; current != last;)
263   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
264   		bins[0] = first;
265   		for(unsigned u = 0; u < bin_count - 1; u++)
266   			bins[u + 1] = bins[u] + bin_sizes[u];
267 
268   		//Swap into place
269   		RandomAccessIter nextbinstart = first;
270   		for(unsigned u = 0; u < bin_count - 1; ++u)
271   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, bin_sizes, log_divisor, div_min);
272   		bins[bin_count - 1] = last;
273 
274   		//If we've bucketsorted, the array is sorted and we should skip recursion
275   		if(!log_divisor)
276   			return;
277 
278   		//Recursing
279   		size_t max_count = get_max_count(log_divisor, last - first);
280   		RandomAccessIter lastPos = first;
281   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
282   			size_t count = bin_cache[u] - lastPos;
283   			if(count < 2)
284   				continue;
285   			if(count < max_count)
286   				std::sort(lastPos, bin_cache[u], comp);
287   			else
288   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
289   		}
290   	}
291 
292   	//Functor implementation for recursive sorting with only Shift overridden
293   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
294   	inline void
spread_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift)295   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
296   					, std::vector<size_t> &bin_sizes, right_shift shift)
297   	{
298   		RandomAccessIter max, min;
299   		find_extremes(first, last, max, min);
300   		if(max == min)
301   			return;
302   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
303   		div_type div_min = shift(*min, log_divisor);
304   		div_type div_max = shift(*max, log_divisor);
305   		unsigned bin_count = div_max - div_min + 1;
306   		unsigned cache_end;
307   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
308 
309   		//Calculating the size of each bin
310   		for (RandomAccessIter current = first; current != last;)
311   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
312   		bins[0] = first;
313   		for(unsigned u = 0; u < bin_count - 1; u++)
314   			bins[u + 1] = bins[u] + bin_sizes[u];
315 
316   		//Swap into place
317   		RandomAccessIter nextbinstart = first;
318   		for(unsigned ii = 0; ii < bin_count - 1; ++ii)
319   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
320   		bins[bin_count - 1] = last;
321 
322   		//If we've bucketsorted, the array is sorted and we should skip recursion
323   		if(!log_divisor)
324   			return;
325 
326   		//Recursing
327   		size_t max_count = get_max_count(log_divisor, last - first);
328   		RandomAccessIter lastPos = first;
329   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
330   			size_t count = bin_cache[u] - lastPos;
331   			if(count < 2)
332   				continue;
333   			if(count < max_count)
334   				std::sort(lastPos, bin_cache[u]);
335   			else
336   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
337   		}
338   	}
339 
340   	//Holds the bin vector and makes the initial recursive call
341   	template <class RandomAccessIter, class div_type, class data_type>
342   	inline void
spread_sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type)343   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type)
344   	{
345   		std::vector<size_t> bin_sizes;
346   		std::vector<RandomAccessIter> bin_cache;
347   		spread_sort_rec<RandomAccessIter, div_type, data_type>(first, last, bin_cache, 0, bin_sizes);
348   	}
349 
350   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
351   	inline void
spread_sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type,right_shift shift,compare comp)352   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
353   	{
354   		std::vector<size_t> bin_sizes;
355   		std::vector<RandomAccessIter> bin_cache;
356   		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(first, last, bin_cache, 0, bin_sizes, shift, comp);
357   	}
358 
359   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
360   	inline void
spread_sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type,right_shift shift)361   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
362   	{
363   		std::vector<size_t> bin_sizes;
364   		std::vector<RandomAccessIter> bin_cache;
365   		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
366   	}
367   }
368 
369   //Top-level sorting call for integers
370   template <class RandomAccessIter>
integer_sort(RandomAccessIter first,RandomAccessIter last)371   inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
372   {
373   	//Don't sort if it's too small to optimize
374   	if(last - first < detail::MIN_SORT_SIZE)
375   		std::sort(first, last);
376   	else
377   		detail::spread_sort(first, last, *first >> 0, *first);
378   }
379 
380   //integer_sort with functors
381   template <class RandomAccessIter, class right_shift, class compare>
integer_sort(RandomAccessIter first,RandomAccessIter last,right_shift shift,compare comp)382   inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
383   						right_shift shift, compare comp) {
384   	if(last - first < detail::MIN_SORT_SIZE)
385   		std::sort(first, last, comp);
386   	else
387   		detail::spread_sort(first, last, shift(*first, 0), *first, shift, comp);
388   }
389 
390   //integer_sort with right_shift functor
391   template <class RandomAccessIter, class right_shift>
integer_sort(RandomAccessIter first,RandomAccessIter last,right_shift shift)392   inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
393   						right_shift shift) {
394   	if(last - first < detail::MIN_SORT_SIZE)
395   		std::sort(first, last);
396   	else
397   		detail::spread_sort(first, last, shift(*first, 0), *first, shift);
398   }
399 
400   //------------------------------------------------------ float_sort source --------------------------------------
401   //Casts a RandomAccessIter to the specified data type
402   template<class cast_type, class RandomAccessIter>
403   inline cast_type
cast_float_iter(const RandomAccessIter & floatiter)404   cast_float_iter(const RandomAccessIter & floatiter)
405   {
406   	cast_type result;
407   	std::memcpy(&result, &(*floatiter), sizeof(cast_type));
408   	return result;
409   }
410 
411   //Casts a data element to the specified datinner_float_a type
412   template<class data_type, class cast_type>
413   inline cast_type
mem_cast(const data_type & data)414   mem_cast(const data_type & data)
415   {
416   	cast_type result;
417   	std::memcpy(&result, &data, sizeof(cast_type));
418   	return result;
419   }
420 
421   namespace detail {
422   	template <class RandomAccessIter, class div_type, class right_shift>
423   	inline void
find_extremes(RandomAccessIter current,RandomAccessIter last,div_type & max,div_type & min,right_shift shift)424   	find_extremes(RandomAccessIter current, RandomAccessIter last, div_type & max, div_type & min, right_shift shift)
425   	{
426   		min = max = shift(*current, 0);
427   		while(++current < last) {
428   			div_type value = shift(*current, 0);
429   			if(max < value)
430   				max = value;
431   			else if(value < min)
432   				min = value;
433   		}
434   	}
435 
436   	//Specialized swap loops for floating-point casting
437   	template <class RandomAccessIter, class div_type, class data_type>
inner_float_swap_loop(RandomAccessIter * bins,const RandomAccessIter & nextbinstart,unsigned ii,const unsigned log_divisor,const div_type div_min)438   	inline void inner_float_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii
439   		, const unsigned log_divisor, const div_type div_min)
440   	{
441   		RandomAccessIter * local_bin = bins + ii;
442   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
443   			for(RandomAccessIter * target_bin = (bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min));  target_bin != local_bin;
444   				target_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min)) {
445   				data_type tmp;
446   				RandomAccessIter b = (*target_bin)++;
447   				RandomAccessIter * b_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(b) >> log_divisor) - div_min);
448   				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
449   				if (b_bin != local_bin) {
450   					RandomAccessIter c = (*b_bin)++;
451   					tmp = *c;
452   					*c = *b;
453   				}
454   				else
455   					tmp = *b;
456   				*b = *current;
457   				*current = tmp;
458   			}
459   		}
460   		*local_bin = nextbinstart;
461   	}
462 
463   	template <class RandomAccessIter, class div_type, class data_type>
float_swap_loop(RandomAccessIter * bins,RandomAccessIter & nextbinstart,unsigned ii,const std::vector<size_t> & bin_sizes,const unsigned log_divisor,const div_type div_min)464   	inline void float_swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii
465   		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
466   	{
467   		nextbinstart += bin_sizes[ii];
468   		inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, log_divisor, div_min);
469   	}
470 
471   	template <class RandomAccessIter, class cast_type>
472   	inline void
find_extremes(RandomAccessIter current,RandomAccessIter last,cast_type & max,cast_type & min)473   	find_extremes(RandomAccessIter current, RandomAccessIter last, cast_type & max, cast_type & min)
474   	{
475   		min = max = cast_float_iter<cast_type, RandomAccessIter>(current);
476   		while(++current < last) {
477   			cast_type value = cast_float_iter<cast_type, RandomAccessIter>(current);
478   			if(max < value)
479   				max = value;
480   			else if(value < min)
481   				min = value;
482   		}
483   	}
484 
485   	//Special-case sorting of positive floats with casting instead of a right_shift
486   	template <class RandomAccessIter, class div_type, class data_type>
487   	inline void
positive_float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)488   	positive_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
489   					, std::vector<size_t> &bin_sizes)
490   	{
491   		div_type max, min;
492   		find_extremes(first, last, max, min);
493   		if(max == min)
494   			return;
495   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
496   		div_type div_min = min >> log_divisor;
497   		div_type div_max = max >> log_divisor;
498   		unsigned bin_count = div_max - div_min + 1;
499   		unsigned cache_end;
500   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
501 
502   		//Calculating the size of each bin
503   		for (RandomAccessIter current = first; current != last;)
504   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
505   		bins[0] = first;
506   		for(unsigned u = 0; u < bin_count - 1; u++)
507   			bins[u + 1] = bins[u] + bin_sizes[u];
508 
509   		//Swap into place
510   		RandomAccessIter nextbinstart = first;
511   		for(unsigned u = 0; u < bin_count - 1; ++u)
512   			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, bin_sizes, log_divisor, div_min);
513   		bins[bin_count - 1] = last;
514 
515   		//Return if we've completed bucketsorting
516   		if(!log_divisor)
517   			return;
518 
519   		//Recursing
520   		size_t max_count = get_max_count(log_divisor, last - first);
521   		RandomAccessIter lastPos = first;
522   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
523   			size_t count = bin_cache[u] - lastPos;
524   			if(count < 2)
525   				continue;
526   			if(count < max_count)
527   				std::sort(lastPos, bin_cache[u]);
528   			else
529   				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
530   		}
531   	}
532 
533   	//Sorting negative_ float_s
534   	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
535   	template <class RandomAccessIter, class div_type, class data_type>
536   	inline void
negative_float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)537   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
538   					, std::vector<size_t> &bin_sizes)
539   	{
540   		div_type max, min;
541   		find_extremes(first, last, max, min);
542   		if(max == min)
543   			return;
544   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
545   		div_type div_min = min >> log_divisor;
546   		div_type div_max = max >> log_divisor;
547   		unsigned bin_count = div_max - div_min + 1;
548   		unsigned cache_end;
549   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
550 
551   		//Calculating the size of each bin
552   		for (RandomAccessIter current = first; current != last;)
553   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
554   		bins[bin_count - 1] = first;
555   		for(int ii = bin_count - 2; ii >= 0; --ii)
556   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
557 
558   		//Swap into place
559   		RandomAccessIter nextbinstart = first;
560   		//The last bin will always have the correct elements in it
561   		for(int ii = bin_count - 1; ii > 0; --ii)
562   			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, bin_sizes, log_divisor, div_min);
563   		//Since we don't process the last bin, we need to update its end position
564   		bin_cache[cache_offset] = last;
565 
566   		//Return if we've completed bucketsorting
567   		if(!log_divisor)
568   			return;
569 
570   		//Recursing
571   		size_t max_count = get_max_count(log_divisor, last - first);
572   		RandomAccessIter lastPos = first;
573   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
574   			size_t count = bin_cache[ii] - lastPos;
575   			if(count < 2)
576   				continue;
577   			if(count < max_count)
578   				std::sort(lastPos, bin_cache[ii]);
579   			else
580   				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
581   		}
582   	}
583 
584   	//Sorting negative_ float_s
585   	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
586   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
587   	inline void
negative_float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift)588   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
589   					, std::vector<size_t> &bin_sizes, right_shift shift)
590   	{
591   		div_type max, min;
592   		find_extremes(first, last, max, min, shift);
593   		if(max == min)
594   			return;
595   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
596   		div_type div_min = min >> log_divisor;
597   		div_type div_max = max >> log_divisor;
598   		unsigned bin_count = div_max - div_min + 1;
599   		unsigned cache_end;
600   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
601 
602   		//Calculating the size of each bin
603   		for (RandomAccessIter current = first; current != last;)
604   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
605   		bins[bin_count - 1] = first;
606   		for(int ii = bin_count - 2; ii >= 0; --ii)
607   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
608 
609   		//Swap into place
610   		RandomAccessIter nextbinstart = first;
611   		//The last bin will always have the correct elements in it
612   		for(int ii = bin_count - 1; ii > 0; --ii)
613   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
614   		//Since we don't process the last bin, we need to update its end position
615   		bin_cache[cache_offset] = last;
616 
617   		//Return if we've completed bucketsorting
618   		if(!log_divisor)
619   			return;
620 
621   		//Recursing
622   		size_t max_count = get_max_count(log_divisor, last - first);
623   		RandomAccessIter lastPos = first;
624   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
625   			size_t count = bin_cache[ii] - lastPos;
626   			if(count < 2)
627   				continue;
628   			if(count < max_count)
629   				std::sort(lastPos, bin_cache[ii]);
630   			else
631   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
632   		}
633   	}
634 
635   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
636   	inline void
negative_float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift,compare comp)637   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
638   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
639   	{
640   		div_type max, min;
641   		find_extremes(first, last, max, min, shift);
642   		if(max == min)
643   			return;
644   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
645   		div_type div_min = min >> log_divisor;
646   		div_type div_max = max >> log_divisor;
647   		unsigned bin_count = div_max - div_min + 1;
648   		unsigned cache_end;
649   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
650 
651   		//Calculating the size of each bin
652   		for (RandomAccessIter current = first; current != last;)
653   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
654   		bins[bin_count - 1] = first;
655   		for(int ii = bin_count - 2; ii >= 0; --ii)
656   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
657 
658   		//Swap into place
659   		RandomAccessIter nextbinstart = first;
660   		//The last bin will always have the correct elements in it
661   		for(int ii = bin_count - 1; ii > 0; --ii)
662   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
663   		//Since we don't process the last bin, we need to update its end position
664   		bin_cache[cache_offset] = last;
665 
666   		//Return if we've completed bucketsorting
667   		if(!log_divisor)
668   			return;
669 
670   		//Recursing
671   		size_t max_count = get_max_count(log_divisor, last - first);
672   		RandomAccessIter lastPos = first;
673   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
674   			size_t count = bin_cache[ii] - lastPos;
675   			if(count < 2)
676   				continue;
677   			if(count < max_count)
678   				std::sort(lastPos, bin_cache[ii], comp);
679   			else
680   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
681   		}
682   	}
683 
684   	//Casting special-case for floating-point sorting
685   	template <class RandomAccessIter, class div_type, class data_type>
686   	inline void
float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)687   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
688   					, std::vector<size_t> &bin_sizes)
689   	{
690   		div_type max, min;
691   		find_extremes(first, last, max, min);
692   		if(max == min)
693   			return;
694   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
695   		div_type div_min = min >> log_divisor;
696   		div_type div_max = max >> log_divisor;
697   		unsigned bin_count = div_max - div_min + 1;
698   		unsigned cache_end;
699   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
700 
701   		//Calculating the size of each bin
702   		for (RandomAccessIter current = first; current != last;)
703   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
704   		//The index of the first positive bin
705   		div_type first_positive = (div_min < 0) ? -div_min : 0;
706   		//Resetting if all bins are negative
707   		if(cache_offset + first_positive > cache_end)
708   			first_positive = cache_end - cache_offset;
709   		//Reversing the order of the negative bins
710   		//Note that because of the negative/positive ordering direction flip
711   		//We can not depend upon bin order and positions matching up
712   		//so bin_sizes must be reused to contain the end of the bin
713   		if(first_positive > 0) {
714   			bins[first_positive - 1] = first;
715   			for(int ii = first_positive - 2; ii >= 0; --ii) {
716   				bins[ii] = first + bin_sizes[ii + 1];
717   				bin_sizes[ii] += bin_sizes[ii + 1];
718   			}
719   			//Handling positives following negatives
720   			if((unsigned)first_positive < bin_count) {
721   				bins[first_positive] = first + bin_sizes[0];
722   				bin_sizes[first_positive] += bin_sizes[0];
723   			}
724   		}
725   		else
726   			bins[0] = first;
727   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
728   			bins[u + 1] = first + bin_sizes[u];
729   			bin_sizes[u + 1] += bin_sizes[u];
730   		}
731 
732   		//Swap into place
733   		RandomAccessIter nextbinstart = first;
734   		for(unsigned u = 0; u < bin_count; ++u) {
735   			nextbinstart = first + bin_sizes[u];
736   			inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, log_divisor, div_min);
737   		}
738 
739   		if(!log_divisor)
740   			return;
741 
742   		//Handling negative values first
743   		size_t max_count = get_max_count(log_divisor, last - first);
744   		RandomAccessIter lastPos = first;
745   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
746   			size_t count = bin_cache[ii] - lastPos;
747   			if(count < 2)
748   				continue;
749   			if(count < max_count)
750   				std::sort(lastPos, bin_cache[ii]);
751   			//sort negative values using reversed-bin spread_sort
752   			else
753   				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
754   		}
755 
756   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
757   			size_t count = bin_cache[u] - lastPos;
758   			if(count < 2)
759   				continue;
760   			if(count < max_count)
761   				std::sort(lastPos, bin_cache[u]);
762   			//sort positive values using normal spread_sort
763   			else
764   				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
765   		}
766   	}
767 
768   	//Functor implementation for recursive sorting
769   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
770   	inline void
float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift)771   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
772   					, std::vector<size_t> &bin_sizes, right_shift shift)
773   	{
774   		div_type max, min;
775   		find_extremes(first, last, max, min, shift);
776   		if(max == min)
777   			return;
778   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
779   		div_type div_min = min >> log_divisor;
780   		div_type div_max = max >> log_divisor;
781   		unsigned bin_count = div_max - div_min + 1;
782   		unsigned cache_end;
783   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
784 
785   		//Calculating the size of each bin
786   		for (RandomAccessIter current = first; current != last;)
787   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
788   		//The index of the first positive bin
789   		div_type first_positive = (div_min < 0) ? -div_min : 0;
790   		//Resetting if all bins are negative
791   		if(cache_offset + first_positive > cache_end)
792   			first_positive = cache_end - cache_offset;
793   		//Reversing the order of the negative bins
794   		//Note that because of the negative/positive ordering direction flip
795   		//We can not depend upon bin order and positions matching up
796   		//so bin_sizes must be reused to contain the end of the bin
797   		if(first_positive > 0) {
798   			bins[first_positive - 1] = first;
799   			for(int ii = first_positive - 2; ii >= 0; --ii) {
800   				bins[ii] = first + bin_sizes[ii + 1];
801   				bin_sizes[ii] += bin_sizes[ii + 1];
802   			}
803   			//Handling positives following negatives
804   			if((unsigned)first_positive < bin_count) {
805   				bins[first_positive] = first + bin_sizes[0];
806   				bin_sizes[first_positive] += bin_sizes[0];
807   			}
808   		}
809   		else
810   			bins[0] = first;
811   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
812   			bins[u + 1] = first + bin_sizes[u];
813   			bin_sizes[u + 1] += bin_sizes[u];
814   		}
815 
816   		//Swap into place
817   		RandomAccessIter nextbinstart = first;
818   		for(unsigned u = 0; u < bin_count; ++u) {
819   			nextbinstart = first + bin_sizes[u];
820   			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
821   		}
822 
823   		//Return if we've completed bucketsorting
824   		if(!log_divisor)
825   			return;
826 
827   		//Handling negative values first
828   		size_t max_count = get_max_count(log_divisor, last - first);
829   		RandomAccessIter lastPos = first;
830   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
831   			size_t count = bin_cache[ii] - lastPos;
832   			if(count < 2)
833   				continue;
834   			if(count < max_count)
835   				std::sort(lastPos, bin_cache[ii]);
836   			//sort negative values using reversed-bin spread_sort
837   			else
838   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
839   		}
840 
841   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
842   			size_t count = bin_cache[u] - lastPos;
843   			if(count < 2)
844   				continue;
845   			if(count < max_count)
846   				std::sort(lastPos, bin_cache[u]);
847   			//sort positive values using normal spread_sort
848   			else
849   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
850   		}
851   	}
852 
853   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
854   	inline void
float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift,compare comp)855   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
856   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
857   	{
858   		div_type max, min;
859   		find_extremes(first, last, max, min, shift);
860   		if(max == min)
861   			return;
862   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
863   		div_type div_min = min >> log_divisor;
864   		div_type div_max = max >> log_divisor;
865   		unsigned bin_count = div_max - div_min + 1;
866   		unsigned cache_end;
867   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
868 
869   		//Calculating the size of each bin
870   		for (RandomAccessIter current = first; current != last;)
871   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
872   		//The index of the first positive bin
873   		div_type first_positive = (div_min < 0) ? -div_min : 0;
874   		//Resetting if all bins are negative
875   		if(cache_offset + first_positive > cache_end)
876   			first_positive = cache_end - cache_offset;
877   		//Reversing the order of the negative bins
878   		//Note that because of the negative/positive ordering direction flip
879   		//We can not depend upon bin order and positions matching up
880   		//so bin_sizes must be reused to contain the end of the bin
881   		if(first_positive > 0) {
882   			bins[first_positive - 1] = first;
883   			for(int ii = first_positive - 2; ii >= 0; --ii) {
884   				bins[ii] = first + bin_sizes[ii + 1];
885   				bin_sizes[ii] += bin_sizes[ii + 1];
886   			}
887   			//Handling positives following negatives
888   			if((unsigned)first_positive < bin_count) {
889   				bins[first_positive] = first + bin_sizes[0];
890   				bin_sizes[first_positive] += bin_sizes[0];
891   			}
892   		}
893   		else
894   			bins[0] = first;
895   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
896   			bins[u + 1] = first + bin_sizes[u];
897   			bin_sizes[u + 1] += bin_sizes[u];
898   		}
899 
900   		//Swap into place
901   		RandomAccessIter nextbinstart = first;
902   		for(unsigned u = 0; u < bin_count; ++u) {
903   			nextbinstart = first + bin_sizes[u];
904   			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
905   		}
906 
907   		//Return if we've completed bucketsorting
908   		if(!log_divisor)
909   			return;
910 
911   		//Handling negative values first
912   		size_t max_count = get_max_count(log_divisor, last - first);
913   		RandomAccessIter lastPos = first;
914   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
915   			size_t count = bin_cache[ii] - lastPos;
916   			if(count < 2)
917   				continue;
918   			if(count < max_count)
919   				std::sort(lastPos, bin_cache[ii]);
920   			//sort negative values using reversed-bin spread_sort
921   			else
922   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
923   		}
924 
925   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
926   			size_t count = bin_cache[u] - lastPos;
927   			if(count < 2)
928   				continue;
929   			if(count < max_count)
930   				std::sort(lastPos, bin_cache[u]);
931   			//sort positive values using normal spread_sort
932   			else
933   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
934   		}
935   	}
936 
937   	template <class RandomAccessIter, class cast_type, class data_type>
938   	inline void
float_Sort(RandomAccessIter first,RandomAccessIter last,cast_type,data_type)939   	float_Sort(RandomAccessIter first, RandomAccessIter last, cast_type, data_type)
940   	{
941   		std::vector<size_t> bin_sizes;
942   		std::vector<RandomAccessIter> bin_cache;
943   		float_sort_rec<RandomAccessIter, cast_type, data_type>(first, last, bin_cache, 0, bin_sizes);
944   	}
945 
946   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
947   	inline void
float_Sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type,right_shift shift)948   	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
949   	{
950   		std::vector<size_t> bin_sizes;
951   		std::vector<RandomAccessIter> bin_cache;
952   		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
953   	}
954 
955   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
956   	inline void
float_Sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type,right_shift shift,compare comp)957   	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
958   	{
959   		std::vector<size_t> bin_sizes;
960   		std::vector<RandomAccessIter> bin_cache;
961   		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift, comp);
962   	}
963   }
964 
965   //float_sort with casting
966   //The cast_type must be equal in size to the data type, and must be a signed integer
967   template <class RandomAccessIter, class cast_type>
float_sort_cast(RandomAccessIter first,RandomAccessIter last,cast_type cVal)968   inline void float_sort_cast(RandomAccessIter first, RandomAccessIter last, cast_type cVal)
969   {
970   	if(last - first < detail::MIN_SORT_SIZE)
971   		std::sort(first, last);
972   	else
973   		detail::float_Sort(first, last, cVal, *first);
974   }
975 
976   //float_sort with casting to an int
977   //Only use this with IEEE floating-point numbers
978   template <class RandomAccessIter>
float_sort_cast_to_int(RandomAccessIter first,RandomAccessIter last)979   inline void float_sort_cast_to_int(RandomAccessIter first, RandomAccessIter last)
980   {
981   	int cVal = 0;
982   	float_sort_cast(first, last, cVal);
983   }
984 
985   //float_sort with functors
986   template <class RandomAccessIter, class right_shift>
float_sort(RandomAccessIter first,RandomAccessIter last,right_shift shift)987   inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift)
988   {
989   	if(last - first < detail::MIN_SORT_SIZE)
990   		std::sort(first, last);
991   	else
992   		detail::float_Sort(first, last, shift(*first, 0), *first, shift);
993   }
994 
995   template <class RandomAccessIter, class right_shift, class compare>
float_sort(RandomAccessIter first,RandomAccessIter last,right_shift shift,compare comp)996   inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift, compare comp)
997   {
998   	if(last - first < detail::MIN_SORT_SIZE)
999   		std::sort(first, last, comp);
1000   	else
1001   		detail::float_Sort(first, last, shift(*first, 0), *first, shift, comp);
1002   }
1003 
1004   //------------------------------------------------- string_sort source ---------------------------------------------
1005   namespace detail {
1006   	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
1007   	template<class RandomAccessIter>
1008   	inline void
update_offset(RandomAccessIter first,RandomAccessIter finish,unsigned & char_offset)1009   	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset)
1010   	{
1011   		unsigned nextOffset = char_offset;
1012   		bool done = false;
1013   		while(!done) {
1014   			RandomAccessIter curr = first;
1015   			do {
1016   				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
1017   				if((*curr).size() > char_offset && ((*curr).size() <= (nextOffset + 1) || (*curr)[nextOffset] != (*first)[nextOffset])) {
1018   					done = true;
1019   					break;
1020   				}
1021   			} while(++curr != finish);
1022   			if(!done)
1023   				++nextOffset;
1024   		}
1025   		char_offset = nextOffset;
1026   	}
1027 
1028   	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
1029   	template<class RandomAccessIter, class get_char, class get_length>
1030   	inline void
update_offset(RandomAccessIter first,RandomAccessIter finish,unsigned & char_offset,get_char getchar,get_length length)1031   	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset, get_char getchar, get_length length)
1032   	{
1033   		unsigned nextOffset = char_offset;
1034   		bool done = false;
1035   		while(!done) {
1036   			RandomAccessIter curr = first;
1037   			do {
1038   				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
1039   				if(length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1) || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {
1040   					done = true;
1041   					break;
1042   				}
1043   			} while(++curr != finish);
1044   			if(!done)
1045   				++nextOffset;
1046   		}
1047   		char_offset = nextOffset;
1048   	}
1049 
1050   	//A comparison functor for strings that assumes they are identical up to char_offset
1051   	template<class data_type, class unsignedchar_type>
1052   	struct offset_lessthan {
offset_lessthanboost::detail::offset_lessthan1053   		offset_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
operator ()boost::detail::offset_lessthan1054   		inline bool operator()(const data_type &x, const data_type &y) const
1055   		{
1056   			unsigned minSize = std::min(x.size(), y.size());
1057   			for(unsigned u = fchar_offset; u < minSize; ++u) {
1058   				if(static_cast<unsignedchar_type>(x[u]) < static_cast<unsignedchar_type>(y[u]))
1059   					return true;
1060   				else if(static_cast<unsignedchar_type>(y[u]) < static_cast<unsignedchar_type>(x[u]))
1061   					return false;
1062   			}
1063   			return x.size() < y.size();
1064   		}
1065   		unsigned fchar_offset;
1066   	};
1067 
1068   	//A comparison functor for strings that assumes they are identical up to char_offset
1069   	template<class data_type, class unsignedchar_type>
1070   	struct offset_greaterthan {
offset_greaterthanboost::detail::offset_greaterthan1071   		offset_greaterthan(unsigned char_offset) : fchar_offset(char_offset){}
operator ()boost::detail::offset_greaterthan1072   		inline bool operator()(const data_type &x, const data_type &y) const
1073   		{
1074   			unsigned minSize = std::min(x.size(), y.size());
1075   			for(unsigned u = fchar_offset; u < minSize; ++u) {
1076   				if(static_cast<unsignedchar_type>(x[u]) > static_cast<unsignedchar_type>(y[u]))
1077   					return true;
1078   				else if(static_cast<unsignedchar_type>(y[u]) > static_cast<unsignedchar_type>(x[u]))
1079   					return false;
1080   			}
1081   			return x.size() > y.size();
1082   		}
1083   		unsigned fchar_offset;
1084   	};
1085 
1086   	//A comparison functor for strings that assumes they are identical up to char_offset
1087   	template<class data_type, class get_char, class get_length>
1088   	struct offset_char_lessthan {
offset_char_lessthanboost::detail::offset_char_lessthan1089   		offset_char_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
operator ()boost::detail::offset_char_lessthan1090   		inline bool operator()(const data_type &x, const data_type &y) const
1091   		{
1092   			unsigned minSize = std::min(length(x), length(y));
1093   			for(unsigned u = fchar_offset; u < minSize; ++u) {
1094   				if(getchar(x, u) < getchar(y, u))
1095   					return true;
1096   				else if(getchar(y, u) < getchar(x, u))
1097   					return false;
1098   			}
1099   			return length(x) < length(y);
1100   		}
1101   		unsigned fchar_offset;
1102   		get_char getchar;
1103   		get_length length;
1104   	};
1105 
1106   	//String sorting recursive implementation
1107   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1108   	inline void
string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)1109   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1110   		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
1111   	{
1112   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1113   		//Iterate to the end of the empties.  If all empty, return
1114   		while((*first).size() <= char_offset) {
1115   			if(++first == last)
1116   				return;
1117   		}
1118   		RandomAccessIter finish = last - 1;
1119   		//Getting the last non-empty
1120   		for(;(*finish).size() <= char_offset; --finish) { }
1121   		++finish;
1122   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
1123   		update_offset(first, finish, char_offset);
1124 
1125   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1126   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1127   		const unsigned max_size = bin_count;
1128   		const unsigned membin_count = bin_count + 1;
1129   		unsigned cache_end;
1130   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
1131 
1132   		//Calculating the size of each bin; this takes roughly 10% of runtime
1133   		for (RandomAccessIter current = first; current != last; ++current) {
1134   			if((*current).size() <= char_offset) {
1135   				bin_sizes[0]++;
1136   			}
1137   			else
1138   				bin_sizes[static_cast<unsignedchar_type>((*current)[char_offset]) + 1]++;
1139   		}
1140   		//Assign the bin positions
1141   		bin_cache[cache_offset] = first;
1142   		for(unsigned u = 0; u < membin_count - 1; u++)
1143   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1144 
1145   		//Swap into place
1146   		RandomAccessIter nextbinstart = first;
1147   		//handling empty bins
1148   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
1149   		nextbinstart +=	bin_sizes[0];
1150   		RandomAccessIter * target_bin;
1151   		//Iterating over each element in the bin of empties
1152   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1153   			//empties belong in this bin
1154   			while((*current).size() > char_offset) {
1155   				target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);
1156   				iter_swap(current, (*target_bin)++);
1157   			}
1158   		}
1159   		*local_bin = nextbinstart;
1160   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1161   		unsigned last_bin = bin_count - 1;
1162   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
1163   		//This dominates runtime, mostly in the swap and bin lookups
1164   		for(unsigned u = 0; u < last_bin; ++u) {
1165   			local_bin = bins + u;
1166   			nextbinstart += bin_sizes[u + 1];
1167   			//Iterating over each element in this bin
1168   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1169   				//Swapping elements in current into place until the correct element has been swapped in
1170   				for(target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
1171   					target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]))
1172   					iter_swap(current, (*target_bin)++);
1173   			}
1174   			*local_bin = nextbinstart;
1175   		}
1176   		bins[last_bin] = last;
1177   		//Recursing
1178   		RandomAccessIter lastPos = bin_cache[cache_offset];
1179   		//Skip this loop for empties
1180   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
1181   			size_t count = bin_cache[u] - lastPos;
1182   			//don't sort unless there are at least two items to compare
1183   			if(count < 2)
1184   				continue;
1185   			//using std::sort if its worst-case is better
1186   			if(count < max_size)
1187   				std::sort(lastPos, bin_cache[u], offset_lessthan<data_type, unsignedchar_type>(char_offset + 1));
1188   			else
1189   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
1190   		}
1191   	}
1192 
1193   	//Sorts strings in reverse order, with empties at the end
1194   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1195   	inline void
reverse_string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)1196   	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1197   		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
1198   	{
1199   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1200   		RandomAccessIter curr = first;
1201   		//Iterate to the end of the empties.  If all empty, return
1202   		while((*curr).size() <= char_offset) {
1203   			if(++curr == last)
1204   				return;
1205   		}
1206   		//Getting the last non-empty
1207   		while((*(--last)).size() <= char_offset) { }
1208   		++last;
1209   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
1210   		update_offset(curr, last, char_offset);
1211   		RandomAccessIter * target_bin;
1212 
1213   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1214   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1215   		const unsigned max_size = bin_count;
1216   		const unsigned membin_count = bin_count + 1;
1217   		const unsigned max_bin = bin_count - 1;
1218   		unsigned cache_end;
1219   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
1220   		RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
1221 
1222   		//Calculating the size of each bin; this takes roughly 10% of runtime
1223   		for (RandomAccessIter current = first; current != last; ++current) {
1224   			if((*current).size() <= char_offset) {
1225   				bin_sizes[bin_count]++;
1226   			}
1227   			else
1228   				bin_sizes[max_bin - static_cast<unsignedchar_type>((*current)[char_offset])]++;
1229   		}
1230   		//Assign the bin positions
1231   		bin_cache[cache_offset] = first;
1232   		for(unsigned u = 0; u < membin_count - 1; u++)
1233   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1234 
1235   		//Swap into place
1236   		RandomAccessIter nextbinstart = last;
1237   		//handling empty bins
1238   		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
1239   		RandomAccessIter lastFull = *local_bin;
1240   		//Iterating over each element in the bin of empties
1241   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1242   			//empties belong in this bin
1243   			while((*current).size() > char_offset) {
1244   				target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);
1245   				iter_swap(current, (*target_bin)++);
1246   			}
1247   		}
1248   		*local_bin = nextbinstart;
1249   		nextbinstart = first;
1250   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1251   		unsigned last_bin = max_bin;
1252   		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
1253   		//This dominates runtime, mostly in the swap and bin lookups
1254   		for(unsigned u = 0; u < last_bin; ++u) {
1255   			local_bin = bins + u;
1256   			nextbinstart += bin_sizes[u];
1257   			//Iterating over each element in this bin
1258   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1259   				//Swapping elements in current into place until the correct element has been swapped in
1260   				for(target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
1261   					target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]))
1262   					iter_swap(current, (*target_bin)++);
1263   			}
1264   			*local_bin = nextbinstart;
1265   		}
1266   		bins[last_bin] = lastFull;
1267   		//Recursing
1268   		RandomAccessIter lastPos = first;
1269   		//Skip this loop for empties
1270   		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
1271   			size_t count = bin_cache[u] - lastPos;
1272   			//don't sort unless there are at least two items to compare
1273   			if(count < 2)
1274   				continue;
1275   			//using std::sort if its worst-case is better
1276   			if(count < max_size)
1277   				std::sort(lastPos, bin_cache[u], offset_greaterthan<data_type, unsignedchar_type>(char_offset + 1));
1278   			else
1279   				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
1280   		}
1281   	}
1282 
1283   	//String sorting recursive implementation
1284   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length>
1285   	inline void
string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,get_char getchar,get_length length)1286   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1287   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length)
1288   	{
1289   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1290   		//Iterate to the end of the empties.  If all empty, return
1291   		while(length(*first) <= char_offset) {
1292   			if(++first == last)
1293   				return;
1294   		}
1295   		RandomAccessIter finish = last - 1;
1296   		//Getting the last non-empty
1297   		for(;length(*finish) <= char_offset; --finish) { }
1298   		++finish;
1299   		update_offset(first, finish, char_offset, getchar, length);
1300 
1301   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1302   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1303   		const unsigned max_size = bin_count;
1304   		const unsigned membin_count = bin_count + 1;
1305   		unsigned cache_end;
1306   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
1307 
1308   		//Calculating the size of each bin; this takes roughly 10% of runtime
1309   		for (RandomAccessIter current = first; current != last; ++current) {
1310   			if(length(*current) <= char_offset) {
1311   				bin_sizes[0]++;
1312   			}
1313   			else
1314   				bin_sizes[getchar((*current), char_offset) + 1]++;
1315   		}
1316   		//Assign the bin positions
1317   		bin_cache[cache_offset] = first;
1318   		for(unsigned u = 0; u < membin_count - 1; u++)
1319   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1320 
1321   		//Swap into place
1322   		RandomAccessIter nextbinstart = first;
1323   		//handling empty bins
1324   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
1325   		nextbinstart +=	bin_sizes[0];
1326   		RandomAccessIter * target_bin;
1327   		//Iterating over each element in the bin of empties
1328   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1329   			//empties belong in this bin
1330   			while(length(*current) > char_offset) {
1331   				target_bin = bins + getchar((*current), char_offset);
1332   				iter_swap(current, (*target_bin)++);
1333   			}
1334   		}
1335   		*local_bin = nextbinstart;
1336   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1337   		unsigned last_bin = bin_count - 1;
1338   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
1339   		//This dominates runtime, mostly in the swap and bin lookups
1340   		for(unsigned ii = 0; ii < last_bin; ++ii) {
1341   			local_bin = bins + ii;
1342   			nextbinstart += bin_sizes[ii + 1];
1343   			//Iterating over each element in this bin
1344   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1345   				//Swapping elements in current into place until the correct element has been swapped in
1346   				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
1347   					target_bin = bins + getchar((*current), char_offset))
1348   					iter_swap(current, (*target_bin)++);
1349   			}
1350   			*local_bin = nextbinstart;
1351   		}
1352   		bins[last_bin] = last;
1353 
1354   		//Recursing
1355   		RandomAccessIter lastPos = bin_cache[cache_offset];
1356   		//Skip this loop for empties
1357   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
1358   			size_t count = bin_cache[u] - lastPos;
1359   			//don't sort unless there are at least two items to compare
1360   			if(count < 2)
1361   				continue;
1362   			//using std::sort if its worst-case is better
1363   			if(count < max_size)
1364   				std::sort(lastPos, bin_cache[u], offset_char_lessthan<data_type, get_char, get_length>(char_offset + 1));
1365   			else
1366   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length);
1367   		}
1368   	}
1369 
1370   	//String sorting recursive implementation
1371   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
1372   	inline void
string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,get_char getchar,get_length length,compare comp)1373   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1374   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
1375   	{
1376   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1377   		//Iterate to the end of the empties.  If all empty, return
1378   		while(length(*first) <= char_offset) {
1379   			if(++first == last)
1380   				return;
1381   		}
1382   		RandomAccessIter finish = last - 1;
1383   		//Getting the last non-empty
1384   		for(;length(*finish) <= char_offset; --finish) { }
1385   		++finish;
1386   		update_offset(first, finish, char_offset, getchar, length);
1387 
1388   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1389   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1390   		const unsigned max_size = bin_count;
1391   		const unsigned membin_count = bin_count + 1;
1392   		unsigned cache_end;
1393   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
1394 
1395   		//Calculating the size of each bin; this takes roughly 10% of runtime
1396   		for (RandomAccessIter current = first; current != last; ++current) {
1397   			if(length(*current) <= char_offset) {
1398   				bin_sizes[0]++;
1399   			}
1400   			else
1401   				bin_sizes[getchar((*current), char_offset) + 1]++;
1402   		}
1403   		//Assign the bin positions
1404   		bin_cache[cache_offset] = first;
1405   		for(unsigned u = 0; u < membin_count - 1; u++)
1406   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1407 
1408   		//Swap into place
1409   		RandomAccessIter nextbinstart = first;
1410   		//handling empty bins
1411   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
1412   		nextbinstart +=	bin_sizes[0];
1413   		RandomAccessIter * target_bin;
1414   		//Iterating over each element in the bin of empties
1415   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1416   			//empties belong in this bin
1417   			while(length(*current) > char_offset) {
1418   				target_bin = bins + getchar((*current), char_offset);
1419   				iter_swap(current, (*target_bin)++);
1420   			}
1421   		}
1422   		*local_bin = nextbinstart;
1423   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1424   		unsigned last_bin = bin_count - 1;
1425   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
1426   		//This dominates runtime, mostly in the swap and bin lookups
1427   		for(unsigned u = 0; u < last_bin; ++u) {
1428   			local_bin = bins + u;
1429   			nextbinstart += bin_sizes[u + 1];
1430   			//Iterating over each element in this bin
1431   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1432   				//Swapping elements in current into place until the correct element has been swapped in
1433   				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
1434   					target_bin = bins + getchar((*current), char_offset))
1435   					iter_swap(current, (*target_bin)++);
1436   			}
1437   			*local_bin = nextbinstart;
1438   		}
1439   		bins[last_bin] = last;
1440 
1441   		//Recursing
1442   		RandomAccessIter lastPos = bin_cache[cache_offset];
1443   		//Skip this loop for empties
1444   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
1445   			size_t count = bin_cache[u] - lastPos;
1446   			//don't sort unless there are at least two items to compare
1447   			if(count < 2)
1448   				continue;
1449   			//using std::sort if its worst-case is better
1450   			if(count < max_size)
1451   				std::sort(lastPos, bin_cache[u], comp);
1452   			else
1453   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
1454   					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
1455   		}
1456   	}
1457 
1458   	//Sorts strings in reverse order, with empties at the end
1459   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
1460   	inline void
reverse_string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,get_char getchar,get_length length,compare comp)1461   	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1462   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
1463   	{
1464   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1465   		RandomAccessIter curr = first;
1466   		//Iterate to the end of the empties.  If all empty, return
1467   		while(length(*curr) <= char_offset) {
1468   			if(++curr == last)
1469   				return;
1470   		}
1471   		//Getting the last non-empty
1472   		while(length(*(--last)) <= char_offset) { }
1473   		++last;
1474   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
1475   		update_offset(first, last, char_offset, getchar, length);
1476 
1477   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1478   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1479   		const unsigned max_size = bin_count;
1480   		const unsigned membin_count = bin_count + 1;
1481   		const unsigned max_bin = bin_count - 1;
1482   		unsigned cache_end;
1483   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
1484   		RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
1485 
1486   		//Calculating the size of each bin; this takes roughly 10% of runtime
1487   		for (RandomAccessIter current = first; current != last; ++current) {
1488   			if(length(*current) <= char_offset) {
1489   				bin_sizes[bin_count]++;
1490   			}
1491   			else
1492   				bin_sizes[max_bin - getchar((*current), char_offset)]++;
1493   		}
1494   		//Assign the bin positions
1495   		bin_cache[cache_offset] = first;
1496   		for(unsigned u = 0; u < membin_count - 1; u++)
1497   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1498 
1499   		//Swap into place
1500   		RandomAccessIter nextbinstart = last;
1501   		//handling empty bins
1502   		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
1503   		RandomAccessIter lastFull = *local_bin;
1504   		RandomAccessIter * target_bin;
1505   		//Iterating over each element in the bin of empties
1506   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1507   			//empties belong in this bin
1508   			while(length(*current) > char_offset) {
1509   				target_bin = end_bin - getchar((*current), char_offset);
1510   				iter_swap(current, (*target_bin)++);
1511   			}
1512   		}
1513   		*local_bin = nextbinstart;
1514   		nextbinstart = first;
1515   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1516   		unsigned last_bin = max_bin;
1517   		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
1518   		//This dominates runtime, mostly in the swap and bin lookups
1519   		for(unsigned u = 0; u < last_bin; ++u) {
1520   			local_bin = bins + u;
1521   			nextbinstart += bin_sizes[u];
1522   			//Iterating over each element in this bin
1523   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1524   				//Swapping elements in current into place until the correct element has been swapped in
1525   				for(target_bin = end_bin - getchar((*current), char_offset);  target_bin != local_bin;
1526   					target_bin = end_bin - getchar((*current), char_offset))
1527   					iter_swap(current, (*target_bin)++);
1528   			}
1529   			*local_bin = nextbinstart;
1530   		}
1531   		bins[last_bin] = lastFull;
1532   		//Recursing
1533   		RandomAccessIter lastPos = first;
1534   		//Skip this loop for empties
1535   		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
1536   			size_t count = bin_cache[u] - lastPos;
1537   			//don't sort unless there are at least two items to compare
1538   			if(count < 2)
1539   				continue;
1540   			//using std::sort if its worst-case is better
1541   			if(count < max_size)
1542   				std::sort(lastPos, bin_cache[u], comp);
1543   			else
1544   				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
1545   					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
1546   		}
1547   	}
1548 
1549   	//Holds the bin vector and makes the initial recursive call
1550   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1551   	inline void
string_sort(RandomAccessIter first,RandomAccessIter last,data_type,unsignedchar_type)1552   	string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
1553   	{
1554   		std::vector<size_t> bin_sizes;
1555   		std::vector<RandomAccessIter> bin_cache;
1556   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
1557   	}
1558 
1559   	//Holds the bin vector and makes the initial recursive call
1560   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1561   	inline void
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,data_type,unsignedchar_type)1562   	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
1563   	{
1564   		std::vector<size_t> bin_sizes;
1565   		std::vector<RandomAccessIter> bin_cache;
1566   		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
1567   	}
1568 
1569   	//Holds the bin vector and makes the initial recursive call
1570   	template <class RandomAccessIter, class get_char, class get_length, class data_type, class unsignedchar_type>
1571   	inline void
string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,data_type,unsignedchar_type)1572   	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, data_type, unsignedchar_type)
1573   	{
1574   		std::vector<size_t> bin_sizes;
1575   		std::vector<RandomAccessIter> bin_cache;
1576   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);
1577   	}
1578 
1579   	//Holds the bin vector and makes the initial recursive call
1580   	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
1581   	inline void
string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,compare comp,data_type,unsignedchar_type)1582   	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
1583   	{
1584   		std::vector<size_t> bin_sizes;
1585   		std::vector<RandomAccessIter> bin_cache;
1586   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
1587   	}
1588 
1589   	//Holds the bin vector and makes the initial recursive call
1590   	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
1591   	inline void
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,compare comp,data_type,unsignedchar_type)1592   	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
1593   	{
1594   		std::vector<size_t> bin_sizes;
1595   		std::vector<RandomAccessIter> bin_cache;
1596   		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
1597   	}
1598   }
1599 
1600   //Allows character-type overloads
1601   template <class RandomAccessIter, class unsignedchar_type>
string_sort(RandomAccessIter first,RandomAccessIter last,unsignedchar_type unused)1602   inline void string_sort(RandomAccessIter first, RandomAccessIter last, unsignedchar_type unused)
1603   {
1604   	//Don't sort if it's too small to optimize
1605   	if(last - first < detail::MIN_SORT_SIZE)
1606   		std::sort(first, last);
1607   	else
1608   		detail::string_sort(first, last, *first, unused);
1609   }
1610 
1611   //Top-level sorting call; wraps using default of unsigned char
1612   template <class RandomAccessIter>
string_sort(RandomAccessIter first,RandomAccessIter last)1613   inline void string_sort(RandomAccessIter first, RandomAccessIter last)
1614   {
1615   	unsigned char unused = '\0';
1616   	string_sort(first, last, unused);
1617   }
1618 
1619   //Allows character-type overloads
1620   template <class RandomAccessIter, class compare, class unsignedchar_type>
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,compare comp,unsignedchar_type unused)1621   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp, unsignedchar_type unused)
1622   {
1623   	//Don't sort if it's too small to optimize
1624   	if(last - first < detail::MIN_SORT_SIZE)
1625   		std::sort(first, last, comp);
1626   	else
1627   		detail::reverse_string_sort(first, last, *first, unused);
1628   }
1629 
1630   //Top-level sorting call; wraps using default of unsigned char
1631   template <class RandomAccessIter, class compare>
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,compare comp)1632   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp)
1633   {
1634   	unsigned char unused = '\0';
1635   	reverse_string_sort(first, last, comp, unused);
1636   }
1637 
1638   template <class RandomAccessIter, class get_char, class get_length>
string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length)1639   inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length)
1640   {
1641   	//Don't sort if it's too small to optimize
1642   	if(last - first < detail::MIN_SORT_SIZE)
1643   		std::sort(first, last);
1644   	else {
1645   		//skipping past empties at the beginning, which allows us to get the character type
1646   		//.empty() is not used so as not to require a user declaration of it
1647   		while(!length(*first)) {
1648   			if(++first == last)
1649   				return;
1650   		}
1651   		detail::string_sort(first, last, getchar, length, *first, getchar((*first), 0));
1652   	}
1653   }
1654 
1655   template <class RandomAccessIter, class get_char, class get_length, class compare>
string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,compare comp)1656   inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
1657   {
1658   	//Don't sort if it's too small to optimize
1659   	if(last - first < detail::MIN_SORT_SIZE)
1660   		std::sort(first, last, comp);
1661   	else {
1662   		//skipping past empties at the beginning, which allows us to get the character type
1663   		//.empty() is not used so as not to require a user declaration of it
1664   		while(!length(*first)) {
1665   			if(++first == last)
1666   				return;
1667   		}
1668   		detail::string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
1669   	}
1670   }
1671 
1672   template <class RandomAccessIter, class get_char, class get_length, class compare>
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,compare comp)1673   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
1674   {
1675   	//Don't sort if it's too small to optimize
1676   	if(last - first < detail::MIN_SORT_SIZE)
1677   		std::sort(first, last, comp);
1678   	else {
1679   		//skipping past empties at the beginning, which allows us to get the character type
1680   		//.empty() is not used so as not to require a user declaration of it
1681   		while(!length(*(--last))) {
1682   			//Note: if there is just one non-empty, and it's at the beginning, then it's already in sorted order
1683   			if(first == last)
1684   				return;
1685   		}
1686   		//making last just after the end of the non-empty part of the array
1687   		++last;
1688   		detail::reverse_string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
1689   	}
1690   }
1691 }
1692 
1693 #endif
1694