1 //Templated spread_sort library
2 
3 //          Copyright Steven J. Ross 2001 - 2009.
4 // Distributed under the Boost Software License, Version 1.0.
5 //    (See accompanying file LICENSE_1_0.txt or copy at
6 //          http://www.boost.org/LICENSE_1_0.txt)
7 
8 //  See http://www.boost.org/ for updates, documentation, and revision history.
9 
10 /*
11 Some improvements suggested by:
12 Phil Endecott and Frank Gennari
13 Cygwin fix provided by:
14 Scott McMurray
15 */
16 
17 #ifndef BOOST_SPREAD_SORT_H
18 #define BOOST_SPREAD_SORT_H
19 #include <algorithm>
20 #include <vector>
21 #include "constants.hpp"
22 #include <cstring>
23 
24 namespace boost {
25   namespace detail {
26   	//This only works on unsigned data types
27   	template <typename T>
28   	inline unsigned
rough_log_2_size(const T & input)29   	rough_log_2_size(const T& input)
30   	{
31   		unsigned result = 0;
32   		//The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance
33   		while((input >> result) && (result < (8*sizeof(T)))) ++result;
34   		return result;
35   	}
36 
37   	//Gets the maximum size which we'll call spread_sort on to control worst-case performance
38   	//Maintains both a minimum size to recurse and a check of distribution size versus count
39   	//This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead
40   	inline size_t
get_max_count(unsigned log_range,size_t count)41   	get_max_count(unsigned log_range, size_t count)
42   	{
43   		unsigned divisor = rough_log_2_size(count);
44   		//Making sure the divisor is positive
45   		if(divisor > LOG_MEAN_BIN_SIZE)
46   			divisor -= LOG_MEAN_BIN_SIZE;
47   		else
48   			divisor = 1;
49   		unsigned relative_width = (LOG_CONST * log_range)/((divisor > MAX_SPLITS) ? MAX_SPLITS : divisor);
50   		//Don't try to bitshift more than the size of an element
51   		if((8*sizeof(size_t)) <= relative_width)
52   			relative_width = (8*sizeof(size_t)) - 1;
53   		return (size_t)1 << ((relative_width < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ?
54   			(LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) :  relative_width);
55   	}
56 
57   	//Find the minimum and maximum using <
58   	template <class RandomAccessIter>
59   	inline void
find_extremes(RandomAccessIter current,RandomAccessIter last,RandomAccessIter & max,RandomAccessIter & min)60   	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min)
61   	{
62   		min = max = current;
63   		//Start from the second item, as max and min are initialized to the first
64   		while(++current < last) {
65   			if(*max < *current)
66   				max = current;
67   			else if(*current < *min)
68   				min = current;
69   		}
70   	}
71 
72   	//Uses a user-defined comparison operator to find minimum and maximum
73   	template <class RandomAccessIter, class compare>
74   	inline void
find_extremes(RandomAccessIter current,RandomAccessIter last,RandomAccessIter & max,RandomAccessIter & min,compare comp)75   	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min, compare comp)
76   	{
77   		min = max = current;
78   		while(++current < last) {
79   			if(comp(*max, *current))
80   				max = current;
81   			else if(comp(*current, *min))
82   				min = current;
83   		}
84   	}
85 
86   	//Gets a non-negative right bit shift to operate as a logarithmic divisor
87   	inline int
get_log_divisor(size_t count,unsigned log_range)88   	get_log_divisor(size_t count, unsigned log_range)
89   	{
90   		int log_divisor;
91   		//If we can finish in one iteration without exceeding either (2 to the MAX_SPLITS) or n bins, do so
92   		if((log_divisor = log_range - rough_log_2_size(count)) <= 0 && log_range < MAX_SPLITS)
93   			log_divisor = 0;
94   		else {
95   			//otherwise divide the data into an optimized number of pieces
96   			log_divisor += LOG_MEAN_BIN_SIZE;
97   			if(log_divisor < 0)
98   				log_divisor = 0;
99   			//Cannot exceed MAX_SPLITS or cache misses slow down bin lookups dramatically
100   			if((log_range - log_divisor) > MAX_SPLITS)
101   				log_divisor = log_range - MAX_SPLITS;
102   		}
103   		return log_divisor;
104   	}
105 
106   	template <class RandomAccessIter>
107   	inline RandomAccessIter *
size_bins(std::vector<size_t> & bin_sizes,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,unsigned & cache_end,unsigned bin_count)108   	size_bins(std::vector<size_t> &bin_sizes, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
109   	{
110   		//Assure space for the size of each bin, followed by initializing sizes
111   		if(bin_count > bin_sizes.size())
112   			bin_sizes.resize(bin_count);
113   		for(size_t u = 0; u < bin_count; u++)
114   			bin_sizes[u] = 0;
115   		//Make sure there is space for the bins
116   		cache_end = cache_offset + bin_count;
117   		if(cache_end > bin_cache.size())
118   			bin_cache.resize(cache_end);
119   		return &(bin_cache[cache_offset]);
120   	}
121 
122   	//Implementation for recursive integer sorting
123   	template <class RandomAccessIter, class div_type, class data_type>
124   	inline void
spread_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)125   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
126   				  , std::vector<size_t> &bin_sizes)
127   	{
128   		//This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data
129   		//If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration
130   		RandomAccessIter max, min;
131   		find_extremes(first, last, max, min);
132   		//max and min will be the same (the first item) iff all values are equivalent
133   		if(max == min)
134   			return;
135   		RandomAccessIter * target_bin;
136   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(*max >> 0) - (*min >> 0)));
137   		div_type div_min = *min >> log_divisor;
138   		div_type div_max = *max >> log_divisor;
139   		unsigned bin_count = div_max - div_min + 1;
140   		unsigned cache_end;
141   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
142 
143   		//Calculating the size of each bin; this takes roughly 10% of runtime
144   		for (RandomAccessIter current = first; current != last;)
145   			bin_sizes[(*(current++) >> log_divisor) - div_min]++;
146   		//Assign the bin positions
147   		bins[0] = first;
148   		for(unsigned u = 0; u < bin_count - 1; u++)
149   			bins[u + 1] = bins[u] + bin_sizes[u];
150 
151   		//Swap into place
152   		//This dominates runtime, mostly in the swap and bin lookups
153   		RandomAccessIter nextbinstart = first;
154   		for(unsigned u = 0; u < bin_count - 1; ++u) {
155   			RandomAccessIter * local_bin = bins + u;
156   			nextbinstart += bin_sizes[u];
157   			//Iterating over each element in this bin
158   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
159   				//Swapping elements in current into place until the correct element has been swapped in
160   				for(target_bin = (bins + ((*current >> log_divisor) - div_min));  target_bin != local_bin;
161   					target_bin = bins + ((*current >> log_divisor) - div_min)) {
162   					//3-way swap; this is about 1% faster than a 2-way swap with integers
163   					//The main advantage is less copies are involved per item put in the correct place
164   					data_type tmp;
165   					RandomAccessIter b = (*target_bin)++;
166   					RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
167   					if (b_bin != local_bin) {
168   						RandomAccessIter c = (*b_bin)++;
169   						tmp = *c;
170   						*c = *b;
171   					}
172   					else
173   						tmp = *b;
174   					*b = *current;
175   					*current = tmp;
176   				}
177   			}
178   			*local_bin = nextbinstart;
179   		}
180   		bins[bin_count - 1] = last;
181 
182   		//If we've bucketsorted, the array is sorted and we should skip recursion
183   		if(!log_divisor)
184   			return;
185 
186   		//Recursing; log_divisor is the remaining range
187   		size_t max_count = get_max_count(log_divisor, last - first);
188   		RandomAccessIter lastPos = first;
189   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
190   			size_t count = bin_cache[u] - lastPos;
191   			//don't sort unless there are at least two items to compare
192   			if(count < 2)
193   				continue;
194   			//using std::sort if its worst-case is better
195   			if(count < max_count)
196   				std::sort(lastPos, bin_cache[u]);
197   			else
198   				spread_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
199   		}
200   	}
201 
202   	//Generic bitshift-based 3-way swapping code
203   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
inner_swap_loop(RandomAccessIter * bins,const RandomAccessIter & nextbinstart,unsigned ii,right_shift & shift,const unsigned log_divisor,const div_type div_min)204   	inline void inner_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
205   		, const unsigned log_divisor, const div_type div_min)
206   	{
207   		RandomAccessIter * local_bin = bins + ii;
208   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
209   			for(RandomAccessIter * target_bin = (bins + (shift(*current, log_divisor) - div_min));  target_bin != local_bin;
210   				target_bin = bins + (shift(*current, log_divisor) - div_min)) {
211   				data_type tmp;
212   				RandomAccessIter b = (*target_bin)++;
213   				RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min);
214   				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
215   				if (b_bin != local_bin) {
216   					RandomAccessIter c = (*b_bin)++;
217   					tmp = *c;
218   					*c = *b;
219   				}
220   				//Note: we could increment current once the swap is done in this case, but that seems to impair performance
221   				else
222   					tmp = *b;
223   				*b = *current;
224   				*current = tmp;
225   			}
226   		}
227   		*local_bin = nextbinstart;
228   	}
229 
230   	//Standard swapping wrapper for ascending values
231   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
swap_loop(RandomAccessIter * bins,RandomAccessIter & nextbinstart,unsigned ii,right_shift & shift,const std::vector<size_t> & bin_sizes,const unsigned log_divisor,const div_type div_min)232   	inline void swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
233   		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
234   	{
235   		nextbinstart += bin_sizes[ii];
236   		inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, log_divisor, div_min);
237   	}
238 
239   	//Functor implementation for recursive sorting
240   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
241   	inline void
spread_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift,compare comp)242   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
243   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
244   	{
245   		RandomAccessIter max, min;
246   		find_extremes(first, last, max, min, comp);
247   		if(max == min)
248   			return;
249   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
250   		div_type div_min = shift(*min, log_divisor);
251   		div_type div_max = shift(*max, log_divisor);
252   		unsigned bin_count = div_max - div_min + 1;
253   		unsigned cache_end;
254   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
255 
256   		//Calculating the size of each bin
257   		for (RandomAccessIter current = first; current != last;)
258   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
259   		bins[0] = first;
260   		for(unsigned u = 0; u < bin_count - 1; u++)
261   			bins[u + 1] = bins[u] + bin_sizes[u];
262 
263   		//Swap into place
264   		RandomAccessIter nextbinstart = first;
265   		for(unsigned u = 0; u < bin_count - 1; ++u)
266   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, bin_sizes, log_divisor, div_min);
267   		bins[bin_count - 1] = last;
268 
269   		//If we've bucketsorted, the array is sorted and we should skip recursion
270   		if(!log_divisor)
271   			return;
272 
273   		//Recursing
274   		size_t max_count = get_max_count(log_divisor, last - first);
275   		RandomAccessIter lastPos = first;
276   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
277   			size_t count = bin_cache[u] - lastPos;
278   			if(count < 2)
279   				continue;
280   			if(count < max_count)
281   				std::sort(lastPos, bin_cache[u], comp);
282   			else
283   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
284   		}
285   	}
286 
287   	//Functor implementation for recursive sorting with only Shift overridden
288   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
289   	inline void
spread_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift)290   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
291   					, std::vector<size_t> &bin_sizes, right_shift shift)
292   	{
293   		RandomAccessIter max, min;
294   		find_extremes(first, last, max, min);
295   		if(max == min)
296   			return;
297   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
298   		div_type div_min = shift(*min, log_divisor);
299   		div_type div_max = shift(*max, log_divisor);
300   		unsigned bin_count = div_max - div_min + 1;
301   		unsigned cache_end;
302   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
303 
304   		//Calculating the size of each bin
305   		for (RandomAccessIter current = first; current != last;)
306   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
307   		bins[0] = first;
308   		for(unsigned u = 0; u < bin_count - 1; u++)
309   			bins[u + 1] = bins[u] + bin_sizes[u];
310 
311   		//Swap into place
312   		RandomAccessIter nextbinstart = first;
313   		for(unsigned ii = 0; ii < bin_count - 1; ++ii)
314   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
315   		bins[bin_count - 1] = last;
316 
317   		//If we've bucketsorted, the array is sorted and we should skip recursion
318   		if(!log_divisor)
319   			return;
320 
321   		//Recursing
322   		size_t max_count = get_max_count(log_divisor, last - first);
323   		RandomAccessIter lastPos = first;
324   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
325   			size_t count = bin_cache[u] - lastPos;
326   			if(count < 2)
327   				continue;
328   			if(count < max_count)
329   				std::sort(lastPos, bin_cache[u]);
330   			else
331   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
332   		}
333   	}
334 
335   	//Holds the bin vector and makes the initial recursive call
336   	template <class RandomAccessIter, class div_type, class data_type>
337   	inline void
spread_sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type)338   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type)
339   	{
340   		std::vector<size_t> bin_sizes;
341   		std::vector<RandomAccessIter> bin_cache;
342   		spread_sort_rec<RandomAccessIter, div_type, data_type>(first, last, bin_cache, 0, bin_sizes);
343   	}
344 
345   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
346   	inline void
spread_sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type,right_shift shift,compare comp)347   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
348   	{
349   		std::vector<size_t> bin_sizes;
350   		std::vector<RandomAccessIter> bin_cache;
351   		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(first, last, bin_cache, 0, bin_sizes, shift, comp);
352   	}
353 
354   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
355   	inline void
spread_sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type,right_shift shift)356   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
357   	{
358   		std::vector<size_t> bin_sizes;
359   		std::vector<RandomAccessIter> bin_cache;
360   		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
361   	}
362   }
363 
364   //Top-level sorting call for integers
365   template <class RandomAccessIter>
integer_sort(RandomAccessIter first,RandomAccessIter last)366   inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
367   {
368   	//Don't sort if it's too small to optimize
369   	if(last - first < detail::MIN_SORT_SIZE)
370   		std::sort(first, last);
371   	else
372   		detail::spread_sort(first, last, *first >> 0, *first);
373   }
374 
375   //integer_sort with functors
376   template <class RandomAccessIter, class right_shift, class compare>
integer_sort(RandomAccessIter first,RandomAccessIter last,right_shift shift,compare comp)377   inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
378   						right_shift shift, compare comp) {
379   	if(last - first < detail::MIN_SORT_SIZE)
380   		std::sort(first, last, comp);
381   	else
382   		detail::spread_sort(first, last, shift(*first, 0), *first, shift, comp);
383   }
384 
385   //integer_sort with right_shift functor
386   template <class RandomAccessIter, class right_shift>
integer_sort(RandomAccessIter first,RandomAccessIter last,right_shift shift)387   inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
388   						right_shift shift) {
389   	if(last - first < detail::MIN_SORT_SIZE)
390   		std::sort(first, last);
391   	else
392   		detail::spread_sort(first, last, shift(*first, 0), *first, shift);
393   }
394 
395   //------------------------------------------------------ float_sort source --------------------------------------
396   //Casts a RandomAccessIter to the specified data type
397   template<class cast_type, class RandomAccessIter>
398   inline cast_type
cast_float_iter(const RandomAccessIter & floatiter)399   cast_float_iter(const RandomAccessIter & floatiter)
400   {
401   	cast_type result;
402   	std::memcpy(&result, &(*floatiter), sizeof(cast_type));
403   	return result;
404   }
405 
406   //Casts a data element to the specified datinner_float_a type
407   template<class data_type, class cast_type>
408   inline cast_type
mem_cast(const data_type & data)409   mem_cast(const data_type & data)
410   {
411   	cast_type result;
412   	std::memcpy(&result, &data, sizeof(cast_type));
413   	return result;
414   }
415 
416   namespace detail {
417   	template <class RandomAccessIter, class div_type, class right_shift>
418   	inline void
find_extremes(RandomAccessIter current,RandomAccessIter last,div_type & max,div_type & min,right_shift shift)419   	find_extremes(RandomAccessIter current, RandomAccessIter last, div_type & max, div_type & min, right_shift shift)
420   	{
421   		min = max = shift(*current, 0);
422   		while(++current < last) {
423   			div_type value = shift(*current, 0);
424   			if(max < value)
425   				max = value;
426   			else if(value < min)
427   				min = value;
428   		}
429   	}
430 
431   	//Specialized swap loops for floating-point casting
432   	template <class RandomAccessIter, class div_type, class data_type>
inner_float_swap_loop(RandomAccessIter * bins,const RandomAccessIter & nextbinstart,unsigned ii,const unsigned log_divisor,const div_type div_min)433   	inline void inner_float_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii
434   		, const unsigned log_divisor, const div_type div_min)
435   	{
436   		RandomAccessIter * local_bin = bins + ii;
437   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
438   			for(RandomAccessIter * target_bin = (bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min));  target_bin != local_bin;
439   				target_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min)) {
440   				data_type tmp;
441   				RandomAccessIter b = (*target_bin)++;
442   				RandomAccessIter * b_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(b) >> log_divisor) - div_min);
443   				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
444   				if (b_bin != local_bin) {
445   					RandomAccessIter c = (*b_bin)++;
446   					tmp = *c;
447   					*c = *b;
448   				}
449   				else
450   					tmp = *b;
451   				*b = *current;
452   				*current = tmp;
453   			}
454   		}
455   		*local_bin = nextbinstart;
456   	}
457 
458   	template <class RandomAccessIter, class div_type, class data_type>
float_swap_loop(RandomAccessIter * bins,RandomAccessIter & nextbinstart,unsigned ii,const std::vector<size_t> & bin_sizes,const unsigned log_divisor,const div_type div_min)459   	inline void float_swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii
460   		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
461   	{
462   		nextbinstart += bin_sizes[ii];
463   		inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, log_divisor, div_min);
464   	}
465 
466   	template <class RandomAccessIter, class cast_type>
467   	inline void
find_extremes(RandomAccessIter current,RandomAccessIter last,cast_type & max,cast_type & min)468   	find_extremes(RandomAccessIter current, RandomAccessIter last, cast_type & max, cast_type & min)
469   	{
470   		min = max = cast_float_iter<cast_type, RandomAccessIter>(current);
471   		while(++current < last) {
472   			cast_type value = cast_float_iter<cast_type, RandomAccessIter>(current);
473   			if(max < value)
474   				max = value;
475   			else if(value < min)
476   				min = value;
477   		}
478   	}
479 
480   	//Special-case sorting of positive floats with casting instead of a right_shift
481   	template <class RandomAccessIter, class div_type, class data_type>
482   	inline void
positive_float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)483   	positive_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
484   					, std::vector<size_t> &bin_sizes)
485   	{
486   		div_type max, min;
487   		find_extremes(first, last, max, min);
488   		if(max == min)
489   			return;
490   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
491   		div_type div_min = min >> log_divisor;
492   		div_type div_max = max >> log_divisor;
493   		unsigned bin_count = div_max - div_min + 1;
494   		unsigned cache_end;
495   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
496 
497   		//Calculating the size of each bin
498   		for (RandomAccessIter current = first; current != last;)
499   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
500   		bins[0] = first;
501   		for(unsigned u = 0; u < bin_count - 1; u++)
502   			bins[u + 1] = bins[u] + bin_sizes[u];
503 
504   		//Swap into place
505   		RandomAccessIter nextbinstart = first;
506   		for(unsigned u = 0; u < bin_count - 1; ++u)
507   			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, bin_sizes, log_divisor, div_min);
508   		bins[bin_count - 1] = last;
509 
510   		//Return if we've completed bucketsorting
511   		if(!log_divisor)
512   			return;
513 
514   		//Recursing
515   		size_t max_count = get_max_count(log_divisor, last - first);
516   		RandomAccessIter lastPos = first;
517   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
518   			size_t count = bin_cache[u] - lastPos;
519   			if(count < 2)
520   				continue;
521   			if(count < max_count)
522   				std::sort(lastPos, bin_cache[u]);
523   			else
524   				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
525   		}
526   	}
527 
528   	//Sorting negative_ float_s
529   	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
530   	template <class RandomAccessIter, class div_type, class data_type>
531   	inline void
negative_float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)532   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
533   					, std::vector<size_t> &bin_sizes)
534   	{
535   		div_type max, min;
536   		find_extremes(first, last, max, min);
537   		if(max == min)
538   			return;
539   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
540   		div_type div_min = min >> log_divisor;
541   		div_type div_max = max >> log_divisor;
542   		unsigned bin_count = div_max - div_min + 1;
543   		unsigned cache_end;
544   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
545 
546   		//Calculating the size of each bin
547   		for (RandomAccessIter current = first; current != last;)
548   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
549   		bins[bin_count - 1] = first;
550   		for(int ii = bin_count - 2; ii >= 0; --ii)
551   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
552 
553   		//Swap into place
554   		RandomAccessIter nextbinstart = first;
555   		//The last bin will always have the correct elements in it
556   		for(int ii = bin_count - 1; ii > 0; --ii)
557   			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, bin_sizes, log_divisor, div_min);
558   		//Since we don't process the last bin, we need to update its end position
559   		bin_cache[cache_offset] = last;
560 
561   		//Return if we've completed bucketsorting
562   		if(!log_divisor)
563   			return;
564 
565   		//Recursing
566   		size_t max_count = get_max_count(log_divisor, last - first);
567   		RandomAccessIter lastPos = first;
568   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
569   			size_t count = bin_cache[ii] - lastPos;
570   			if(count < 2)
571   				continue;
572   			if(count < max_count)
573   				std::sort(lastPos, bin_cache[ii]);
574   			else
575   				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
576   		}
577   	}
578 
579   	//Sorting negative_ float_s
580   	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
581   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
582   	inline void
negative_float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift)583   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
584   					, std::vector<size_t> &bin_sizes, right_shift shift)
585   	{
586   		div_type max, min;
587   		find_extremes(first, last, max, min, shift);
588   		if(max == min)
589   			return;
590   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
591   		div_type div_min = min >> log_divisor;
592   		div_type div_max = max >> log_divisor;
593   		unsigned bin_count = div_max - div_min + 1;
594   		unsigned cache_end;
595   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
596 
597   		//Calculating the size of each bin
598   		for (RandomAccessIter current = first; current != last;)
599   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
600   		bins[bin_count - 1] = first;
601   		for(int ii = bin_count - 2; ii >= 0; --ii)
602   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
603 
604   		//Swap into place
605   		RandomAccessIter nextbinstart = first;
606   		//The last bin will always have the correct elements in it
607   		for(int ii = bin_count - 1; ii > 0; --ii)
608   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
609   		//Since we don't process the last bin, we need to update its end position
610   		bin_cache[cache_offset] = last;
611 
612   		//Return if we've completed bucketsorting
613   		if(!log_divisor)
614   			return;
615 
616   		//Recursing
617   		size_t max_count = get_max_count(log_divisor, last - first);
618   		RandomAccessIter lastPos = first;
619   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
620   			size_t count = bin_cache[ii] - lastPos;
621   			if(count < 2)
622   				continue;
623   			if(count < max_count)
624   				std::sort(lastPos, bin_cache[ii]);
625   			else
626   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
627   		}
628   	}
629 
630   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
631   	inline void
negative_float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift,compare comp)632   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
633   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
634   	{
635   		div_type max, min;
636   		find_extremes(first, last, max, min, shift);
637   		if(max == min)
638   			return;
639   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
640   		div_type div_min = min >> log_divisor;
641   		div_type div_max = max >> log_divisor;
642   		unsigned bin_count = div_max - div_min + 1;
643   		unsigned cache_end;
644   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
645 
646   		//Calculating the size of each bin
647   		for (RandomAccessIter current = first; current != last;)
648   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
649   		bins[bin_count - 1] = first;
650   		for(int ii = bin_count - 2; ii >= 0; --ii)
651   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
652 
653   		//Swap into place
654   		RandomAccessIter nextbinstart = first;
655   		//The last bin will always have the correct elements in it
656   		for(int ii = bin_count - 1; ii > 0; --ii)
657   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
658   		//Since we don't process the last bin, we need to update its end position
659   		bin_cache[cache_offset] = last;
660 
661   		//Return if we've completed bucketsorting
662   		if(!log_divisor)
663   			return;
664 
665   		//Recursing
666   		size_t max_count = get_max_count(log_divisor, last - first);
667   		RandomAccessIter lastPos = first;
668   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
669   			size_t count = bin_cache[ii] - lastPos;
670   			if(count < 2)
671   				continue;
672   			if(count < max_count)
673   				std::sort(lastPos, bin_cache[ii], comp);
674   			else
675   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
676   		}
677   	}
678 
679   	//Casting special-case for floating-point sorting
680   	template <class RandomAccessIter, class div_type, class data_type>
681   	inline void
float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)682   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
683   					, std::vector<size_t> &bin_sizes)
684   	{
685   		div_type max, min;
686   		find_extremes(first, last, max, min);
687   		if(max == min)
688   			return;
689   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
690   		div_type div_min = min >> log_divisor;
691   		div_type div_max = max >> log_divisor;
692   		unsigned bin_count = div_max - div_min + 1;
693   		unsigned cache_end;
694   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
695 
696   		//Calculating the size of each bin
697   		for (RandomAccessIter current = first; current != last;)
698   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
699   		//The index of the first positive bin
700   		div_type first_positive = (div_min < 0) ? -div_min : 0;
701   		//Resetting if all bins are negative
702   		if(cache_offset + first_positive > cache_end)
703   			first_positive = cache_end - cache_offset;
704   		//Reversing the order of the negative bins
705   		//Note that because of the negative/positive ordering direction flip
706   		//We can not depend upon bin order and positions matching up
707   		//so bin_sizes must be reused to contain the end of the bin
708   		if(first_positive > 0) {
709   			bins[first_positive - 1] = first;
710   			for(int ii = first_positive - 2; ii >= 0; --ii) {
711   				bins[ii] = first + bin_sizes[ii + 1];
712   				bin_sizes[ii] += bin_sizes[ii + 1];
713   			}
714   			//Handling positives following negatives
715   			if((unsigned)first_positive < bin_count) {
716   				bins[first_positive] = first + bin_sizes[0];
717   				bin_sizes[first_positive] += bin_sizes[0];
718   			}
719   		}
720   		else
721   			bins[0] = first;
722   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
723   			bins[u + 1] = first + bin_sizes[u];
724   			bin_sizes[u + 1] += bin_sizes[u];
725   		}
726 
727   		//Swap into place
728   		RandomAccessIter nextbinstart = first;
729   		for(unsigned u = 0; u < bin_count; ++u) {
730   			nextbinstart = first + bin_sizes[u];
731   			inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, log_divisor, div_min);
732   		}
733 
734   		if(!log_divisor)
735   			return;
736 
737   		//Handling negative values first
738   		size_t max_count = get_max_count(log_divisor, last - first);
739   		RandomAccessIter lastPos = first;
740   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
741   			size_t count = bin_cache[ii] - lastPos;
742   			if(count < 2)
743   				continue;
744   			if(count < max_count)
745   				std::sort(lastPos, bin_cache[ii]);
746   			//sort negative values using reversed-bin spread_sort
747   			else
748   				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
749   		}
750 
751   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
752   			size_t count = bin_cache[u] - lastPos;
753   			if(count < 2)
754   				continue;
755   			if(count < max_count)
756   				std::sort(lastPos, bin_cache[u]);
757   			//sort positive values using normal spread_sort
758   			else
759   				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
760   		}
761   	}
762 
763   	//Functor implementation for recursive sorting
764   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
765   	inline void
float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift)766   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
767   					, std::vector<size_t> &bin_sizes, right_shift shift)
768   	{
769   		div_type max, min;
770   		find_extremes(first, last, max, min, shift);
771   		if(max == min)
772   			return;
773   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
774   		div_type div_min = min >> log_divisor;
775   		div_type div_max = max >> log_divisor;
776   		unsigned bin_count = div_max - div_min + 1;
777   		unsigned cache_end;
778   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
779 
780   		//Calculating the size of each bin
781   		for (RandomAccessIter current = first; current != last;)
782   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
783   		//The index of the first positive bin
784   		div_type first_positive = (div_min < 0) ? -div_min : 0;
785   		//Resetting if all bins are negative
786   		if(cache_offset + first_positive > cache_end)
787   			first_positive = cache_end - cache_offset;
788   		//Reversing the order of the negative bins
789   		//Note that because of the negative/positive ordering direction flip
790   		//We can not depend upon bin order and positions matching up
791   		//so bin_sizes must be reused to contain the end of the bin
792   		if(first_positive > 0) {
793   			bins[first_positive - 1] = first;
794   			for(int ii = first_positive - 2; ii >= 0; --ii) {
795   				bins[ii] = first + bin_sizes[ii + 1];
796   				bin_sizes[ii] += bin_sizes[ii + 1];
797   			}
798   			//Handling positives following negatives
799   			if((unsigned)first_positive < bin_count) {
800   				bins[first_positive] = first + bin_sizes[0];
801   				bin_sizes[first_positive] += bin_sizes[0];
802   			}
803   		}
804   		else
805   			bins[0] = first;
806   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
807   			bins[u + 1] = first + bin_sizes[u];
808   			bin_sizes[u + 1] += bin_sizes[u];
809   		}
810 
811   		//Swap into place
812   		RandomAccessIter nextbinstart = first;
813   		for(unsigned u = 0; u < bin_count; ++u) {
814   			nextbinstart = first + bin_sizes[u];
815   			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
816   		}
817 
818   		//Return if we've completed bucketsorting
819   		if(!log_divisor)
820   			return;
821 
822   		//Handling negative values first
823   		size_t max_count = get_max_count(log_divisor, last - first);
824   		RandomAccessIter lastPos = first;
825   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
826   			size_t count = bin_cache[ii] - lastPos;
827   			if(count < 2)
828   				continue;
829   			if(count < max_count)
830   				std::sort(lastPos, bin_cache[ii]);
831   			//sort negative values using reversed-bin spread_sort
832   			else
833   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
834   		}
835 
836   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
837   			size_t count = bin_cache[u] - lastPos;
838   			if(count < 2)
839   				continue;
840   			if(count < max_count)
841   				std::sort(lastPos, bin_cache[u]);
842   			//sort positive values using normal spread_sort
843   			else
844   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
845   		}
846   	}
847 
848   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
849   	inline void
float_sort_rec(RandomAccessIter first,RandomAccessIter last,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,right_shift shift,compare comp)850   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
851   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
852   	{
853   		div_type max, min;
854   		find_extremes(first, last, max, min, shift);
855   		if(max == min)
856   			return;
857   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
858   		div_type div_min = min >> log_divisor;
859   		div_type div_max = max >> log_divisor;
860   		unsigned bin_count = div_max - div_min + 1;
861   		unsigned cache_end;
862   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
863 
864   		//Calculating the size of each bin
865   		for (RandomAccessIter current = first; current != last;)
866   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
867   		//The index of the first positive bin
868   		div_type first_positive = (div_min < 0) ? -div_min : 0;
869   		//Resetting if all bins are negative
870   		if(cache_offset + first_positive > cache_end)
871   			first_positive = cache_end - cache_offset;
872   		//Reversing the order of the negative bins
873   		//Note that because of the negative/positive ordering direction flip
874   		//We can not depend upon bin order and positions matching up
875   		//so bin_sizes must be reused to contain the end of the bin
876   		if(first_positive > 0) {
877   			bins[first_positive - 1] = first;
878   			for(int ii = first_positive - 2; ii >= 0; --ii) {
879   				bins[ii] = first + bin_sizes[ii + 1];
880   				bin_sizes[ii] += bin_sizes[ii + 1];
881   			}
882   			//Handling positives following negatives
883   			if((unsigned)first_positive < bin_count) {
884   				bins[first_positive] = first + bin_sizes[0];
885   				bin_sizes[first_positive] += bin_sizes[0];
886   			}
887   		}
888   		else
889   			bins[0] = first;
890   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
891   			bins[u + 1] = first + bin_sizes[u];
892   			bin_sizes[u + 1] += bin_sizes[u];
893   		}
894 
895   		//Swap into place
896   		RandomAccessIter nextbinstart = first;
897   		for(unsigned u = 0; u < bin_count; ++u) {
898   			nextbinstart = first + bin_sizes[u];
899   			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
900   		}
901 
902   		//Return if we've completed bucketsorting
903   		if(!log_divisor)
904   			return;
905 
906   		//Handling negative values first
907   		size_t max_count = get_max_count(log_divisor, last - first);
908   		RandomAccessIter lastPos = first;
909   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
910   			size_t count = bin_cache[ii] - lastPos;
911   			if(count < 2)
912   				continue;
913   			if(count < max_count)
914   				std::sort(lastPos, bin_cache[ii]);
915   			//sort negative values using reversed-bin spread_sort
916   			else
917   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
918   		}
919 
920   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
921   			size_t count = bin_cache[u] - lastPos;
922   			if(count < 2)
923   				continue;
924   			if(count < max_count)
925   				std::sort(lastPos, bin_cache[u]);
926   			//sort positive values using normal spread_sort
927   			else
928   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
929   		}
930   	}
931 
932   	template <class RandomAccessIter, class cast_type, class data_type>
933   	inline void
float_Sort(RandomAccessIter first,RandomAccessIter last,cast_type,data_type)934   	float_Sort(RandomAccessIter first, RandomAccessIter last, cast_type, data_type)
935   	{
936   		std::vector<size_t> bin_sizes;
937   		std::vector<RandomAccessIter> bin_cache;
938   		float_sort_rec<RandomAccessIter, cast_type, data_type>(first, last, bin_cache, 0, bin_sizes);
939   	}
940 
941   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
942   	inline void
float_Sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type,right_shift shift)943   	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
944   	{
945   		std::vector<size_t> bin_sizes;
946   		std::vector<RandomAccessIter> bin_cache;
947   		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
948   	}
949 
950   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
951   	inline void
float_Sort(RandomAccessIter first,RandomAccessIter last,div_type,data_type,right_shift shift,compare comp)952   	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
953   	{
954   		std::vector<size_t> bin_sizes;
955   		std::vector<RandomAccessIter> bin_cache;
956   		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift, comp);
957   	}
958   }
959 
960   //float_sort with casting
961   //The cast_type must be equal in size to the data type, and must be a signed integer
962   template <class RandomAccessIter, class cast_type>
float_sort_cast(RandomAccessIter first,RandomAccessIter last,cast_type cVal)963   inline void float_sort_cast(RandomAccessIter first, RandomAccessIter last, cast_type cVal)
964   {
965   	if(last - first < detail::MIN_SORT_SIZE)
966   		std::sort(first, last);
967   	else
968   		detail::float_Sort(first, last, cVal, *first);
969   }
970 
971   //float_sort with casting to an int
972   //Only use this with IEEE floating-point numbers
973   template <class RandomAccessIter>
float_sort_cast_to_int(RandomAccessIter first,RandomAccessIter last)974   inline void float_sort_cast_to_int(RandomAccessIter first, RandomAccessIter last)
975   {
976   	int cVal = 0;
977   	float_sort_cast(first, last, cVal);
978   }
979 
980   //float_sort with functors
981   template <class RandomAccessIter, class right_shift>
float_sort(RandomAccessIter first,RandomAccessIter last,right_shift shift)982   inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift)
983   {
984   	if(last - first < detail::MIN_SORT_SIZE)
985   		std::sort(first, last);
986   	else
987   		detail::float_Sort(first, last, shift(*first, 0), *first, shift);
988   }
989 
990   template <class RandomAccessIter, class right_shift, class compare>
float_sort(RandomAccessIter first,RandomAccessIter last,right_shift shift,compare comp)991   inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift, compare comp)
992   {
993   	if(last - first < detail::MIN_SORT_SIZE)
994   		std::sort(first, last, comp);
995   	else
996   		detail::float_Sort(first, last, shift(*first, 0), *first, shift, comp);
997   }
998 
999   //------------------------------------------------- string_sort source ---------------------------------------------
1000   namespace detail {
1001   	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
1002   	template<class RandomAccessIter>
1003   	inline void
update_offset(RandomAccessIter first,RandomAccessIter finish,unsigned & char_offset)1004   	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset)
1005   	{
1006   		unsigned nextOffset = char_offset;
1007   		bool done = false;
1008   		while(!done) {
1009   			RandomAccessIter curr = first;
1010   			do {
1011   				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
1012   				if((*curr).size() > char_offset && ((*curr).size() <= (nextOffset + 1) || (*curr)[nextOffset] != (*first)[nextOffset])) {
1013   					done = true;
1014   					break;
1015   				}
1016   			} while(++curr != finish);
1017   			if(!done)
1018   				++nextOffset;
1019   		}
1020   		char_offset = nextOffset;
1021   	}
1022 
1023   	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
1024   	template<class RandomAccessIter, class get_char, class get_length>
1025   	inline void
update_offset(RandomAccessIter first,RandomAccessIter finish,unsigned & char_offset,get_char getchar,get_length length)1026   	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset, get_char getchar, get_length length)
1027   	{
1028   		unsigned nextOffset = char_offset;
1029   		bool done = false;
1030   		while(!done) {
1031   			RandomAccessIter curr = first;
1032   			do {
1033   				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
1034   				if(length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1) || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {
1035   					done = true;
1036   					break;
1037   				}
1038   			} while(++curr != finish);
1039   			if(!done)
1040   				++nextOffset;
1041   		}
1042   		char_offset = nextOffset;
1043   	}
1044 
1045   	//A comparison functor for strings that assumes they are identical up to char_offset
1046   	template<class data_type, class unsignedchar_type>
1047   	struct offset_lessthan {
offset_lessthanboost::detail::offset_lessthan1048   		offset_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
operator ()boost::detail::offset_lessthan1049   		inline bool operator()(const data_type &x, const data_type &y) const
1050   		{
1051   			unsigned minSize = std::min(x.size(), y.size());
1052   			for(unsigned u = fchar_offset; u < minSize; ++u) {
1053   				if(static_cast<unsignedchar_type>(x[u]) < static_cast<unsignedchar_type>(y[u]))
1054   					return true;
1055   				else if(static_cast<unsignedchar_type>(y[u]) < static_cast<unsignedchar_type>(x[u]))
1056   					return false;
1057   			}
1058   			return x.size() < y.size();
1059   		}
1060   		unsigned fchar_offset;
1061   	};
1062 
1063   	//A comparison functor for strings that assumes they are identical up to char_offset
1064   	template<class data_type, class unsignedchar_type>
1065   	struct offset_greaterthan {
offset_greaterthanboost::detail::offset_greaterthan1066   		offset_greaterthan(unsigned char_offset) : fchar_offset(char_offset){}
operator ()boost::detail::offset_greaterthan1067   		inline bool operator()(const data_type &x, const data_type &y) const
1068   		{
1069   			unsigned minSize = std::min(x.size(), y.size());
1070   			for(unsigned u = fchar_offset; u < minSize; ++u) {
1071   				if(static_cast<unsignedchar_type>(x[u]) > static_cast<unsignedchar_type>(y[u]))
1072   					return true;
1073   				else if(static_cast<unsignedchar_type>(y[u]) > static_cast<unsignedchar_type>(x[u]))
1074   					return false;
1075   			}
1076   			return x.size() > y.size();
1077   		}
1078   		unsigned fchar_offset;
1079   	};
1080 
1081   	//A comparison functor for strings that assumes they are identical up to char_offset
1082   	template<class data_type, class get_char, class get_length>
1083   	struct offset_char_lessthan {
offset_char_lessthanboost::detail::offset_char_lessthan1084   		offset_char_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
operator ()boost::detail::offset_char_lessthan1085   		inline bool operator()(const data_type &x, const data_type &y) const
1086   		{
1087   			unsigned minSize = std::min(length(x), length(y));
1088   			for(unsigned u = fchar_offset; u < minSize; ++u) {
1089   				if(getchar(x, u) < getchar(y, u))
1090   					return true;
1091   				else if(getchar(y, u) < getchar(x, u))
1092   					return false;
1093   			}
1094   			return length(x) < length(y);
1095   		}
1096   		unsigned fchar_offset;
1097   		get_char getchar;
1098   		get_length length;
1099   	};
1100 
1101   	//String sorting recursive implementation
1102   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1103   	inline void
string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)1104   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1105   		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
1106   	{
1107   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1108   		//Iterate to the end of the empties.  If all empty, return
1109   		while((*first).size() <= char_offset) {
1110   			if(++first == last)
1111   				return;
1112   		}
1113   		RandomAccessIter finish = last - 1;
1114   		//Getting the last non-empty
1115   		for(;(*finish).size() <= char_offset; --finish) { }
1116   		++finish;
1117   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
1118   		update_offset(first, finish, char_offset);
1119 
1120   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1121   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1122   		const unsigned max_size = bin_count;
1123   		const unsigned membin_count = bin_count + 1;
1124   		unsigned cache_end;
1125   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
1126 
1127   		//Calculating the size of each bin; this takes roughly 10% of runtime
1128   		for (RandomAccessIter current = first; current != last; ++current) {
1129   			if((*current).size() <= char_offset) {
1130   				bin_sizes[0]++;
1131   			}
1132   			else
1133   				bin_sizes[static_cast<unsignedchar_type>((*current)[char_offset]) + 1]++;
1134   		}
1135   		//Assign the bin positions
1136   		bin_cache[cache_offset] = first;
1137   		for(unsigned u = 0; u < membin_count - 1; u++)
1138   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1139 
1140   		//Swap into place
1141   		RandomAccessIter nextbinstart = first;
1142   		//handling empty bins
1143   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
1144   		nextbinstart +=	bin_sizes[0];
1145   		RandomAccessIter * target_bin;
1146   		//Iterating over each element in the bin of empties
1147   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1148   			//empties belong in this bin
1149   			while((*current).size() > char_offset) {
1150   				target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);
1151   				iter_swap(current, (*target_bin)++);
1152   			}
1153   		}
1154   		*local_bin = nextbinstart;
1155   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1156   		unsigned last_bin = bin_count - 1;
1157   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
1158   		//This dominates runtime, mostly in the swap and bin lookups
1159   		for(unsigned u = 0; u < last_bin; ++u) {
1160   			local_bin = bins + u;
1161   			nextbinstart += bin_sizes[u + 1];
1162   			//Iterating over each element in this bin
1163   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1164   				//Swapping elements in current into place until the correct element has been swapped in
1165   				for(target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
1166   					target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]))
1167   					iter_swap(current, (*target_bin)++);
1168   			}
1169   			*local_bin = nextbinstart;
1170   		}
1171   		bins[last_bin] = last;
1172   		//Recursing
1173   		RandomAccessIter lastPos = bin_cache[cache_offset];
1174   		//Skip this loop for empties
1175   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
1176   			size_t count = bin_cache[u] - lastPos;
1177   			//don't sort unless there are at least two items to compare
1178   			if(count < 2)
1179   				continue;
1180   			//using std::sort if its worst-case is better
1181   			if(count < max_size)
1182   				std::sort(lastPos, bin_cache[u], offset_lessthan<data_type, unsignedchar_type>(char_offset + 1));
1183   			else
1184   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
1185   		}
1186   	}
1187 
1188   	//Sorts strings in reverse order, with empties at the end
1189   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1190   	inline void
reverse_string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes)1191   	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1192   		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
1193   	{
1194   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1195   		RandomAccessIter curr = first;
1196   		//Iterate to the end of the empties.  If all empty, return
1197   		while((*curr).size() <= char_offset) {
1198   			if(++curr == last)
1199   				return;
1200   		}
1201   		//Getting the last non-empty
1202   		while((*(--last)).size() <= char_offset) { }
1203   		++last;
1204   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
1205   		update_offset(curr, last, char_offset);
1206   		RandomAccessIter * target_bin;
1207 
1208   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1209   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1210   		const unsigned max_size = bin_count;
1211   		const unsigned membin_count = bin_count + 1;
1212   		const unsigned max_bin = bin_count - 1;
1213   		unsigned cache_end;
1214   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
1215   		RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
1216 
1217   		//Calculating the size of each bin; this takes roughly 10% of runtime
1218   		for (RandomAccessIter current = first; current != last; ++current) {
1219   			if((*current).size() <= char_offset) {
1220   				bin_sizes[bin_count]++;
1221   			}
1222   			else
1223   				bin_sizes[max_bin - static_cast<unsignedchar_type>((*current)[char_offset])]++;
1224   		}
1225   		//Assign the bin positions
1226   		bin_cache[cache_offset] = first;
1227   		for(unsigned u = 0; u < membin_count - 1; u++)
1228   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1229 
1230   		//Swap into place
1231   		RandomAccessIter nextbinstart = last;
1232   		//handling empty bins
1233   		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
1234   		RandomAccessIter lastFull = *local_bin;
1235   		//Iterating over each element in the bin of empties
1236   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1237   			//empties belong in this bin
1238   			while((*current).size() > char_offset) {
1239   				target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);
1240   				iter_swap(current, (*target_bin)++);
1241   			}
1242   		}
1243   		*local_bin = nextbinstart;
1244   		nextbinstart = first;
1245   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1246   		unsigned last_bin = max_bin;
1247   		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
1248   		//This dominates runtime, mostly in the swap and bin lookups
1249   		for(unsigned u = 0; u < last_bin; ++u) {
1250   			local_bin = bins + u;
1251   			nextbinstart += bin_sizes[u];
1252   			//Iterating over each element in this bin
1253   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1254   				//Swapping elements in current into place until the correct element has been swapped in
1255   				for(target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
1256   					target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]))
1257   					iter_swap(current, (*target_bin)++);
1258   			}
1259   			*local_bin = nextbinstart;
1260   		}
1261   		bins[last_bin] = lastFull;
1262   		//Recursing
1263   		RandomAccessIter lastPos = first;
1264   		//Skip this loop for empties
1265   		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
1266   			size_t count = bin_cache[u] - lastPos;
1267   			//don't sort unless there are at least two items to compare
1268   			if(count < 2)
1269   				continue;
1270   			//using std::sort if its worst-case is better
1271   			if(count < max_size)
1272   				std::sort(lastPos, bin_cache[u], offset_greaterthan<data_type, unsignedchar_type>(char_offset + 1));
1273   			else
1274   				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
1275   		}
1276   	}
1277 
1278   	//String sorting recursive implementation
1279   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length>
1280   	inline void
string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,get_char getchar,get_length length)1281   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1282   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length)
1283   	{
1284   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1285   		//Iterate to the end of the empties.  If all empty, return
1286   		while(length(*first) <= char_offset) {
1287   			if(++first == last)
1288   				return;
1289   		}
1290   		RandomAccessIter finish = last - 1;
1291   		//Getting the last non-empty
1292   		for(;length(*finish) <= char_offset; --finish) { }
1293   		++finish;
1294   		update_offset(first, finish, char_offset, getchar, length);
1295 
1296   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1297   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1298   		const unsigned max_size = bin_count;
1299   		const unsigned membin_count = bin_count + 1;
1300   		unsigned cache_end;
1301   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
1302 
1303   		//Calculating the size of each bin; this takes roughly 10% of runtime
1304   		for (RandomAccessIter current = first; current != last; ++current) {
1305   			if(length(*current) <= char_offset) {
1306   				bin_sizes[0]++;
1307   			}
1308   			else
1309   				bin_sizes[getchar((*current), char_offset) + 1]++;
1310   		}
1311   		//Assign the bin positions
1312   		bin_cache[cache_offset] = first;
1313   		for(unsigned u = 0; u < membin_count - 1; u++)
1314   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1315 
1316   		//Swap into place
1317   		RandomAccessIter nextbinstart = first;
1318   		//handling empty bins
1319   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
1320   		nextbinstart +=	bin_sizes[0];
1321   		RandomAccessIter * target_bin;
1322   		//Iterating over each element in the bin of empties
1323   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1324   			//empties belong in this bin
1325   			while(length(*current) > char_offset) {
1326   				target_bin = bins + getchar((*current), char_offset);
1327   				iter_swap(current, (*target_bin)++);
1328   			}
1329   		}
1330   		*local_bin = nextbinstart;
1331   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1332   		unsigned last_bin = bin_count - 1;
1333   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
1334   		//This dominates runtime, mostly in the swap and bin lookups
1335   		for(unsigned ii = 0; ii < last_bin; ++ii) {
1336   			local_bin = bins + ii;
1337   			nextbinstart += bin_sizes[ii + 1];
1338   			//Iterating over each element in this bin
1339   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1340   				//Swapping elements in current into place until the correct element has been swapped in
1341   				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
1342   					target_bin = bins + getchar((*current), char_offset))
1343   					iter_swap(current, (*target_bin)++);
1344   			}
1345   			*local_bin = nextbinstart;
1346   		}
1347   		bins[last_bin] = last;
1348 
1349   		//Recursing
1350   		RandomAccessIter lastPos = bin_cache[cache_offset];
1351   		//Skip this loop for empties
1352   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
1353   			size_t count = bin_cache[u] - lastPos;
1354   			//don't sort unless there are at least two items to compare
1355   			if(count < 2)
1356   				continue;
1357   			//using std::sort if its worst-case is better
1358   			if(count < max_size)
1359   				std::sort(lastPos, bin_cache[u], offset_char_lessthan<data_type, get_char, get_length>(char_offset + 1));
1360   			else
1361   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length);
1362   		}
1363   	}
1364 
1365   	//String sorting recursive implementation
1366   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
1367   	inline void
string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,get_char getchar,get_length length,compare comp)1368   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1369   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
1370   	{
1371   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1372   		//Iterate to the end of the empties.  If all empty, return
1373   		while(length(*first) <= char_offset) {
1374   			if(++first == last)
1375   				return;
1376   		}
1377   		RandomAccessIter finish = last - 1;
1378   		//Getting the last non-empty
1379   		for(;length(*finish) <= char_offset; --finish) { }
1380   		++finish;
1381   		update_offset(first, finish, char_offset, getchar, length);
1382 
1383   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1384   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1385   		const unsigned max_size = bin_count;
1386   		const unsigned membin_count = bin_count + 1;
1387   		unsigned cache_end;
1388   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
1389 
1390   		//Calculating the size of each bin; this takes roughly 10% of runtime
1391   		for (RandomAccessIter current = first; current != last; ++current) {
1392   			if(length(*current) <= char_offset) {
1393   				bin_sizes[0]++;
1394   			}
1395   			else
1396   				bin_sizes[getchar((*current), char_offset) + 1]++;
1397   		}
1398   		//Assign the bin positions
1399   		bin_cache[cache_offset] = first;
1400   		for(unsigned u = 0; u < membin_count - 1; u++)
1401   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1402 
1403   		//Swap into place
1404   		RandomAccessIter nextbinstart = first;
1405   		//handling empty bins
1406   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
1407   		nextbinstart +=	bin_sizes[0];
1408   		RandomAccessIter * target_bin;
1409   		//Iterating over each element in the bin of empties
1410   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1411   			//empties belong in this bin
1412   			while(length(*current) > char_offset) {
1413   				target_bin = bins + getchar((*current), char_offset);
1414   				iter_swap(current, (*target_bin)++);
1415   			}
1416   		}
1417   		*local_bin = nextbinstart;
1418   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1419   		unsigned last_bin = bin_count - 1;
1420   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
1421   		//This dominates runtime, mostly in the swap and bin lookups
1422   		for(unsigned u = 0; u < last_bin; ++u) {
1423   			local_bin = bins + u;
1424   			nextbinstart += bin_sizes[u + 1];
1425   			//Iterating over each element in this bin
1426   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1427   				//Swapping elements in current into place until the correct element has been swapped in
1428   				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
1429   					target_bin = bins + getchar((*current), char_offset))
1430   					iter_swap(current, (*target_bin)++);
1431   			}
1432   			*local_bin = nextbinstart;
1433   		}
1434   		bins[last_bin] = last;
1435 
1436   		//Recursing
1437   		RandomAccessIter lastPos = bin_cache[cache_offset];
1438   		//Skip this loop for empties
1439   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
1440   			size_t count = bin_cache[u] - lastPos;
1441   			//don't sort unless there are at least two items to compare
1442   			if(count < 2)
1443   				continue;
1444   			//using std::sort if its worst-case is better
1445   			if(count < max_size)
1446   				std::sort(lastPos, bin_cache[u], comp);
1447   			else
1448   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
1449   					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
1450   		}
1451   	}
1452 
1453   	//Sorts strings in reverse order, with empties at the end
1454   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
1455   	inline void
reverse_string_sort_rec(RandomAccessIter first,RandomAccessIter last,unsigned char_offset,std::vector<RandomAccessIter> & bin_cache,unsigned cache_offset,std::vector<size_t> & bin_sizes,get_char getchar,get_length length,compare comp)1456   	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1457   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
1458   	{
1459   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1460   		RandomAccessIter curr = first;
1461   		//Iterate to the end of the empties.  If all empty, return
1462   		while(length(*curr) <= char_offset) {
1463   			if(++curr == last)
1464   				return;
1465   		}
1466   		//Getting the last non-empty
1467   		while(length(*(--last)) <= char_offset) { }
1468   		++last;
1469   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
1470   		update_offset(first, last, char_offset, getchar, length);
1471 
1472   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1473   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1474   		const unsigned max_size = bin_count;
1475   		const unsigned membin_count = bin_count + 1;
1476   		const unsigned max_bin = bin_count - 1;
1477   		unsigned cache_end;
1478   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
1479   		RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
1480 
1481   		//Calculating the size of each bin; this takes roughly 10% of runtime
1482   		for (RandomAccessIter current = first; current != last; ++current) {
1483   			if(length(*current) <= char_offset) {
1484   				bin_sizes[bin_count]++;
1485   			}
1486   			else
1487   				bin_sizes[max_bin - getchar((*current), char_offset)]++;
1488   		}
1489   		//Assign the bin positions
1490   		bin_cache[cache_offset] = first;
1491   		for(unsigned u = 0; u < membin_count - 1; u++)
1492   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1493 
1494   		//Swap into place
1495   		RandomAccessIter nextbinstart = last;
1496   		//handling empty bins
1497   		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
1498   		RandomAccessIter lastFull = *local_bin;
1499   		RandomAccessIter * target_bin;
1500   		//Iterating over each element in the bin of empties
1501   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1502   			//empties belong in this bin
1503   			while(length(*current) > char_offset) {
1504   				target_bin = end_bin - getchar((*current), char_offset);
1505   				iter_swap(current, (*target_bin)++);
1506   			}
1507   		}
1508   		*local_bin = nextbinstart;
1509   		nextbinstart = first;
1510   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1511   		unsigned last_bin = max_bin;
1512   		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
1513   		//This dominates runtime, mostly in the swap and bin lookups
1514   		for(unsigned u = 0; u < last_bin; ++u) {
1515   			local_bin = bins + u;
1516   			nextbinstart += bin_sizes[u];
1517   			//Iterating over each element in this bin
1518   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1519   				//Swapping elements in current into place until the correct element has been swapped in
1520   				for(target_bin = end_bin - getchar((*current), char_offset);  target_bin != local_bin;
1521   					target_bin = end_bin - getchar((*current), char_offset))
1522   					iter_swap(current, (*target_bin)++);
1523   			}
1524   			*local_bin = nextbinstart;
1525   		}
1526   		bins[last_bin] = lastFull;
1527   		//Recursing
1528   		RandomAccessIter lastPos = first;
1529   		//Skip this loop for empties
1530   		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
1531   			size_t count = bin_cache[u] - lastPos;
1532   			//don't sort unless there are at least two items to compare
1533   			if(count < 2)
1534   				continue;
1535   			//using std::sort if its worst-case is better
1536   			if(count < max_size)
1537   				std::sort(lastPos, bin_cache[u], comp);
1538   			else
1539   				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
1540   					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
1541   		}
1542   	}
1543 
1544   	//Holds the bin vector and makes the initial recursive call
1545   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1546   	inline void
string_sort(RandomAccessIter first,RandomAccessIter last,data_type,unsignedchar_type)1547   	string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
1548   	{
1549   		std::vector<size_t> bin_sizes;
1550   		std::vector<RandomAccessIter> bin_cache;
1551   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
1552   	}
1553 
1554   	//Holds the bin vector and makes the initial recursive call
1555   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1556   	inline void
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,data_type,unsignedchar_type)1557   	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
1558   	{
1559   		std::vector<size_t> bin_sizes;
1560   		std::vector<RandomAccessIter> bin_cache;
1561   		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
1562   	}
1563 
1564   	//Holds the bin vector and makes the initial recursive call
1565   	template <class RandomAccessIter, class get_char, class get_length, class data_type, class unsignedchar_type>
1566   	inline void
string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,data_type,unsignedchar_type)1567   	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, data_type, unsignedchar_type)
1568   	{
1569   		std::vector<size_t> bin_sizes;
1570   		std::vector<RandomAccessIter> bin_cache;
1571   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);
1572   	}
1573 
1574   	//Holds the bin vector and makes the initial recursive call
1575   	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
1576   	inline void
string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,compare comp,data_type,unsignedchar_type)1577   	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
1578   	{
1579   		std::vector<size_t> bin_sizes;
1580   		std::vector<RandomAccessIter> bin_cache;
1581   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
1582   	}
1583 
1584   	//Holds the bin vector and makes the initial recursive call
1585   	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
1586   	inline void
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,compare comp,data_type,unsignedchar_type)1587   	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
1588   	{
1589   		std::vector<size_t> bin_sizes;
1590   		std::vector<RandomAccessIter> bin_cache;
1591   		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
1592   	}
1593   }
1594 
1595   //Allows character-type overloads
1596   template <class RandomAccessIter, class unsignedchar_type>
string_sort(RandomAccessIter first,RandomAccessIter last,unsignedchar_type unused)1597   inline void string_sort(RandomAccessIter first, RandomAccessIter last, unsignedchar_type unused)
1598   {
1599   	//Don't sort if it's too small to optimize
1600   	if(last - first < detail::MIN_SORT_SIZE)
1601   		std::sort(first, last);
1602   	else
1603   		detail::string_sort(first, last, *first, unused);
1604   }
1605 
1606   //Top-level sorting call; wraps using default of unsigned char
1607   template <class RandomAccessIter>
string_sort(RandomAccessIter first,RandomAccessIter last)1608   inline void string_sort(RandomAccessIter first, RandomAccessIter last)
1609   {
1610   	unsigned char unused = '\0';
1611   	string_sort(first, last, unused);
1612   }
1613 
1614   //Allows character-type overloads
1615   template <class RandomAccessIter, class compare, class unsignedchar_type>
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,compare comp,unsignedchar_type unused)1616   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp, unsignedchar_type unused)
1617   {
1618   	//Don't sort if it's too small to optimize
1619   	if(last - first < detail::MIN_SORT_SIZE)
1620   		std::sort(first, last, comp);
1621   	else
1622   		detail::reverse_string_sort(first, last, *first, unused);
1623   }
1624 
1625   //Top-level sorting call; wraps using default of unsigned char
1626   template <class RandomAccessIter, class compare>
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,compare comp)1627   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp)
1628   {
1629   	unsigned char unused = '\0';
1630   	reverse_string_sort(first, last, comp, unused);
1631   }
1632 
1633   template <class RandomAccessIter, class get_char, class get_length>
string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length)1634   inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length)
1635   {
1636   	//Don't sort if it's too small to optimize
1637   	if(last - first < detail::MIN_SORT_SIZE)
1638   		std::sort(first, last);
1639   	else {
1640   		//skipping past empties at the beginning, which allows us to get the character type
1641   		//.empty() is not used so as not to require a user declaration of it
1642   		while(!length(*first)) {
1643   			if(++first == last)
1644   				return;
1645   		}
1646   		detail::string_sort(first, last, getchar, length, *first, getchar((*first), 0));
1647   	}
1648   }
1649 
1650   template <class RandomAccessIter, class get_char, class get_length, class compare>
string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,compare comp)1651   inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
1652   {
1653   	//Don't sort if it's too small to optimize
1654   	if(last - first < detail::MIN_SORT_SIZE)
1655   		std::sort(first, last, comp);
1656   	else {
1657   		//skipping past empties at the beginning, which allows us to get the character type
1658   		//.empty() is not used so as not to require a user declaration of it
1659   		while(!length(*first)) {
1660   			if(++first == last)
1661   				return;
1662   		}
1663   		detail::string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
1664   	}
1665   }
1666 
1667   template <class RandomAccessIter, class get_char, class get_length, class compare>
reverse_string_sort(RandomAccessIter first,RandomAccessIter last,get_char getchar,get_length length,compare comp)1668   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
1669   {
1670   	//Don't sort if it's too small to optimize
1671   	if(last - first < detail::MIN_SORT_SIZE)
1672   		std::sort(first, last, comp);
1673   	else {
1674   		//skipping past empties at the beginning, which allows us to get the character type
1675   		//.empty() is not used so as not to require a user declaration of it
1676   		while(!length(*(--last))) {
1677   			//Note: if there is just one non-empty, and it's at the beginning, then it's already in sorted order
1678   			if(first == last)
1679   				return;
1680   		}
1681   		//making last just after the end of the non-empty part of the array
1682   		++last;
1683   		detail::reverse_string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
1684   	}
1685   }
1686 }
1687 
1688 #endif
1689