rayon/slice/
mod.rs

1//! Parallel iterator types for [slices]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! [slices]: std::slice
7
8mod chunk_by;
9mod chunks;
10mod rchunks;
11mod sort;
12
13mod test;
14
15use self::sort::par_mergesort;
16use self::sort::par_quicksort;
17use crate::iter::plumbing::*;
18use crate::iter::*;
19use crate::split_producer::*;
20
21use std::cmp::Ordering;
22use std::fmt::{self, Debug};
23
24pub use self::chunk_by::{ChunkBy, ChunkByMut};
25pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
26pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
27
28/// Parallel extensions for slices.
29pub trait ParallelSlice<T: Sync> {
30    /// Returns a plain slice, which is used to implement the rest of the
31    /// parallel methods.
32    fn as_parallel_slice(&self) -> &[T];
33
34    /// Returns a parallel iterator over subslices separated by elements that
35    /// match the separator.
36    ///
37    /// # Examples
38    ///
39    /// ```
40    /// use rayon::prelude::*;
41    /// let products: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
42    ///     .par_split(|i| *i == 0)
43    ///     .map(|numbers| numbers.iter().product::<i32>())
44    ///     .collect();
45    /// assert_eq!(products, [6, 64, 162]);
46    /// ```
47    fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
48    where
49        P: Fn(&T) -> bool + Sync + Send,
50    {
51        Split {
52            slice: self.as_parallel_slice(),
53            separator,
54        }
55    }
56
57    /// Returns a parallel iterator over subslices separated by elements that
58    /// match the separator, including the matched part as a terminator.
59    ///
60    /// # Examples
61    ///
62    /// ```
63    /// use rayon::prelude::*;
64    /// let lengths: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
65    ///     .par_split_inclusive(|i| *i == 0)
66    ///     .map(|numbers| numbers.len())
67    ///     .collect();
68    /// assert_eq!(lengths, [4, 4, 3]);
69    /// ```
70    fn par_split_inclusive<P>(&self, separator: P) -> SplitInclusive<'_, T, P>
71    where
72        P: Fn(&T) -> bool + Sync + Send,
73    {
74        SplitInclusive {
75            slice: self.as_parallel_slice(),
76            separator,
77        }
78    }
79
80    /// Returns a parallel iterator over all contiguous windows of length
81    /// `window_size`. The windows overlap.
82    ///
83    /// # Examples
84    ///
85    /// ```
86    /// use rayon::prelude::*;
87    /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
88    /// assert_eq!(vec![[1, 2], [2, 3]], windows);
89    /// ```
90    fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
91        Windows {
92            window_size,
93            slice: self.as_parallel_slice(),
94        }
95    }
96
97    /// Returns a parallel iterator over at most `chunk_size` elements of
98    /// `self` at a time. The chunks do not overlap.
99    ///
100    /// If the number of elements in the iterator is not divisible by
101    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
102    /// other chunks will have that exact length.
103    ///
104    /// # Examples
105    ///
106    /// ```
107    /// use rayon::prelude::*;
108    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
109    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
110    /// ```
111    #[track_caller]
112    fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
113        assert!(chunk_size != 0, "chunk_size must not be zero");
114        Chunks::new(chunk_size, self.as_parallel_slice())
115    }
116
117    /// Returns a parallel iterator over `chunk_size` elements of
118    /// `self` at a time. The chunks do not overlap.
119    ///
120    /// If `chunk_size` does not divide the length of the slice, then the
121    /// last up to `chunk_size-1` elements will be omitted and can be
122    /// retrieved from the remainder function of the iterator.
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// use rayon::prelude::*;
128    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
129    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
130    /// ```
131    #[track_caller]
132    fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
133        assert!(chunk_size != 0, "chunk_size must not be zero");
134        ChunksExact::new(chunk_size, self.as_parallel_slice())
135    }
136
137    /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
138    /// starting at the end. The chunks do not overlap.
139    ///
140    /// If the number of elements in the iterator is not divisible by
141    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
142    /// other chunks will have that exact length.
143    ///
144    /// # Examples
145    ///
146    /// ```
147    /// use rayon::prelude::*;
148    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect();
149    /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]);
150    /// ```
151    #[track_caller]
152    fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
153        assert!(chunk_size != 0, "chunk_size must not be zero");
154        RChunks::new(chunk_size, self.as_parallel_slice())
155    }
156
157    /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
158    /// starting at the end. The chunks do not overlap.
159    ///
160    /// If `chunk_size` does not divide the length of the slice, then the
161    /// last up to `chunk_size-1` elements will be omitted and can be
162    /// retrieved from the remainder function of the iterator.
163    ///
164    /// # Examples
165    ///
166    /// ```
167    /// use rayon::prelude::*;
168    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect();
169    /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]);
170    /// ```
171    #[track_caller]
172    fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
173        assert!(chunk_size != 0, "chunk_size must not be zero");
174        RChunksExact::new(chunk_size, self.as_parallel_slice())
175    }
176
177    /// Returns a parallel iterator over the slice producing non-overlapping runs
178    /// of elements using the predicate to separate them.
179    ///
180    /// The predicate is called on two elements following themselves,
181    /// it means the predicate is called on `slice[0]` and `slice[1]`
182    /// then on `slice[1]` and `slice[2]` and so on.
183    ///
184    /// # Examples
185    ///
186    /// ```
187    /// use rayon::prelude::*;
188    /// let chunks: Vec<_> = [1, 2, 2, 3, 3, 3].par_chunk_by(|&x, &y| x == y).collect();
189    /// assert_eq!(chunks[0], &[1]);
190    /// assert_eq!(chunks[1], &[2, 2]);
191    /// assert_eq!(chunks[2], &[3, 3, 3]);
192    /// ```
193    fn par_chunk_by<F>(&self, pred: F) -> ChunkBy<'_, T, F>
194    where
195        F: Fn(&T, &T) -> bool + Send + Sync,
196    {
197        ChunkBy::new(self.as_parallel_slice(), pred)
198    }
199}
200
201impl<T: Sync> ParallelSlice<T> for [T] {
202    #[inline]
203    fn as_parallel_slice(&self) -> &[T] {
204        self
205    }
206}
207
208/// Parallel extensions for mutable slices.
209pub trait ParallelSliceMut<T: Send> {
210    /// Returns a plain mutable slice, which is used to implement the rest of
211    /// the parallel methods.
212    fn as_parallel_slice_mut(&mut self) -> &mut [T];
213
214    /// Returns a parallel iterator over mutable subslices separated by
215    /// elements that match the separator.
216    ///
217    /// # Examples
218    ///
219    /// ```
220    /// use rayon::prelude::*;
221    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
222    /// array.par_split_mut(|i| *i == 0)
223    ///      .for_each(|slice| slice.reverse());
224    /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
225    /// ```
226    fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
227    where
228        P: Fn(&T) -> bool + Sync + Send,
229    {
230        SplitMut {
231            slice: self.as_parallel_slice_mut(),
232            separator,
233        }
234    }
235
236    /// Returns a parallel iterator over mutable subslices separated by elements
237    /// that match the separator, including the matched part as a terminator.
238    ///
239    /// # Examples
240    ///
241    /// ```
242    /// use rayon::prelude::*;
243    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
244    /// array.par_split_inclusive_mut(|i| *i == 0)
245    ///      .for_each(|slice| slice.reverse());
246    /// assert_eq!(array, [0, 3, 2, 1, 0, 8, 4, 2, 9, 6, 3]);
247    /// ```
248    fn par_split_inclusive_mut<P>(&mut self, separator: P) -> SplitInclusiveMut<'_, T, P>
249    where
250        P: Fn(&T) -> bool + Sync + Send,
251    {
252        SplitInclusiveMut {
253            slice: self.as_parallel_slice_mut(),
254            separator,
255        }
256    }
257
258    /// Returns a parallel iterator over at most `chunk_size` elements of
259    /// `self` at a time. The chunks are mutable and do not overlap.
260    ///
261    /// If the number of elements in the iterator is not divisible by
262    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
263    /// other chunks will have that exact length.
264    ///
265    /// # Examples
266    ///
267    /// ```
268    /// use rayon::prelude::*;
269    /// let mut array = [1, 2, 3, 4, 5];
270    /// array.par_chunks_mut(2)
271    ///      .for_each(|slice| slice.reverse());
272    /// assert_eq!(array, [2, 1, 4, 3, 5]);
273    /// ```
274    #[track_caller]
275    fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
276        assert!(chunk_size != 0, "chunk_size must not be zero");
277        ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
278    }
279
280    /// Returns a parallel iterator over `chunk_size` elements of
281    /// `self` at a time. The chunks are mutable and do not overlap.
282    ///
283    /// If `chunk_size` does not divide the length of the slice, then the
284    /// last up to `chunk_size-1` elements will be omitted and can be
285    /// retrieved from the remainder function of the iterator.
286    ///
287    /// # Examples
288    ///
289    /// ```
290    /// use rayon::prelude::*;
291    /// let mut array = [1, 2, 3, 4, 5];
292    /// array.par_chunks_exact_mut(3)
293    ///      .for_each(|slice| slice.reverse());
294    /// assert_eq!(array, [3, 2, 1, 4, 5]);
295    /// ```
296    #[track_caller]
297    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
298        assert!(chunk_size != 0, "chunk_size must not be zero");
299        ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
300    }
301
302    /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
303    /// starting at the end. The chunks are mutable and do not overlap.
304    ///
305    /// If the number of elements in the iterator is not divisible by
306    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
307    /// other chunks will have that exact length.
308    ///
309    /// # Examples
310    ///
311    /// ```
312    /// use rayon::prelude::*;
313    /// let mut array = [1, 2, 3, 4, 5];
314    /// array.par_rchunks_mut(2)
315    ///      .for_each(|slice| slice.reverse());
316    /// assert_eq!(array, [1, 3, 2, 5, 4]);
317    /// ```
318    #[track_caller]
319    fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
320        assert!(chunk_size != 0, "chunk_size must not be zero");
321        RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
322    }
323
324    /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
325    /// starting at the end. The chunks are mutable and do not overlap.
326    ///
327    /// If `chunk_size` does not divide the length of the slice, then the
328    /// last up to `chunk_size-1` elements will be omitted and can be
329    /// retrieved from the remainder function of the iterator.
330    ///
331    /// # Examples
332    ///
333    /// ```
334    /// use rayon::prelude::*;
335    /// let mut array = [1, 2, 3, 4, 5];
336    /// array.par_rchunks_exact_mut(3)
337    ///      .for_each(|slice| slice.reverse());
338    /// assert_eq!(array, [1, 2, 5, 4, 3]);
339    /// ```
340    #[track_caller]
341    fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
342        assert!(chunk_size != 0, "chunk_size must not be zero");
343        RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
344    }
345
346    /// Sorts the slice in parallel.
347    ///
348    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
349    ///
350    /// When applicable, unstable sorting is preferred because it is generally faster than stable
351    /// sorting and it doesn't allocate auxiliary memory.
352    /// See [`par_sort_unstable`](#method.par_sort_unstable).
353    ///
354    /// # Current implementation
355    ///
356    /// The current algorithm is an adaptive merge sort inspired by
357    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
358    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
359    /// two or more sorted sequences concatenated one after another.
360    ///
361    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
362    /// non-allocating insertion sort is used instead.
363    ///
364    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
365    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
366    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
367    /// parallel subdivision of chunks and parallel merge operation.
368    ///
369    /// # Examples
370    ///
371    /// ```
372    /// use rayon::prelude::*;
373    ///
374    /// let mut v = [-5, 4, 1, -3, 2];
375    ///
376    /// v.par_sort();
377    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
378    /// ```
379    fn par_sort(&mut self)
380    where
381        T: Ord,
382    {
383        par_mergesort(self.as_parallel_slice_mut(), T::lt);
384    }
385
386    /// Sorts the slice in parallel with a comparator function.
387    ///
388    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
389    ///
390    /// The comparator function must define a total ordering for the elements in the slice. If
391    /// the ordering is not total, the order of the elements is unspecified. An order is a
392    /// total order if it is (for all `a`, `b` and `c`):
393    ///
394    /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
395    /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
396    ///
397    /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
398    /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
399    ///
400    /// ```
401    /// use rayon::prelude::*;
402    ///
403    /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
404    /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap());
405    /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
406    /// ```
407    ///
408    /// When applicable, unstable sorting is preferred because it is generally faster than stable
409    /// sorting and it doesn't allocate auxiliary memory.
410    /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
411    ///
412    /// # Current implementation
413    ///
414    /// The current algorithm is an adaptive merge sort inspired by
415    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
416    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
417    /// two or more sorted sequences concatenated one after another.
418    ///
419    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
420    /// non-allocating insertion sort is used instead.
421    ///
422    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
423    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
424    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
425    /// parallel subdivision of chunks and parallel merge operation.
426    ///
427    /// # Examples
428    ///
429    /// ```
430    /// use rayon::prelude::*;
431    ///
432    /// let mut v = [5, 4, 1, 3, 2];
433    /// v.par_sort_by(|a, b| a.cmp(b));
434    /// assert_eq!(v, [1, 2, 3, 4, 5]);
435    ///
436    /// // reverse sorting
437    /// v.par_sort_by(|a, b| b.cmp(a));
438    /// assert_eq!(v, [5, 4, 3, 2, 1]);
439    /// ```
440    fn par_sort_by<F>(&mut self, compare: F)
441    where
442        F: Fn(&T, &T) -> Ordering + Sync,
443    {
444        par_mergesort(self.as_parallel_slice_mut(), |a, b| {
445            compare(a, b) == Ordering::Less
446        });
447    }
448
449    /// Sorts the slice in parallel with a key extraction function.
450    ///
451    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*))
452    /// worst-case, where the key function is *O*(*m*).
453    ///
454    /// For expensive key functions (e.g. functions that are not simple property accesses or
455    /// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
456    /// be significantly faster, as it does not recompute element keys.
457    ///
458    /// When applicable, unstable sorting is preferred because it is generally faster than stable
459    /// sorting and it doesn't allocate auxiliary memory.
460    /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
461    ///
462    /// # Current implementation
463    ///
464    /// The current algorithm is an adaptive merge sort inspired by
465    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
466    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
467    /// two or more sorted sequences concatenated one after another.
468    ///
469    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
470    /// non-allocating insertion sort is used instead.
471    ///
472    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
473    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
474    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
475    /// parallel subdivision of chunks and parallel merge operation.
476    ///
477    /// # Examples
478    ///
479    /// ```
480    /// use rayon::prelude::*;
481    ///
482    /// let mut v = [-5i32, 4, 1, -3, 2];
483    ///
484    /// v.par_sort_by_key(|k| k.abs());
485    /// assert_eq!(v, [1, 2, -3, 4, -5]);
486    /// ```
487    fn par_sort_by_key<K, F>(&mut self, f: F)
488    where
489        K: Ord,
490        F: Fn(&T) -> K + Sync,
491    {
492        par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
493    }
494
495    /// Sorts the slice in parallel with a key extraction function.
496    ///
497    /// During sorting, the key function is called at most once per element, by using
498    /// temporary storage to remember the results of key evaluation.
499    /// The key function is called in parallel, so the order of calls is completely unspecified.
500    ///
501    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*))
502    /// worst-case, where the key function is *O*(*m*).
503    ///
504    /// For simple key functions (e.g., functions that are property accesses or
505    /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
506    /// faster.
507    ///
508    /// # Current implementation
509    ///
510    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
511    /// which combines the fast average case of randomized quicksort with the fast worst case of
512    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
513    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
514    /// deterministic behavior.
515    ///
516    /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
517    /// length of the slice.
518    ///
519    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
520    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
521    /// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
522    ///
523    /// [pdqsort]: https://github.com/orlp/pdqsort
524    ///
525    /// # Examples
526    ///
527    /// ```
528    /// use rayon::prelude::*;
529    ///
530    /// let mut v = [-5i32, 4, 32, -3, 2];
531    ///
532    /// v.par_sort_by_cached_key(|k| k.to_string());
533    /// assert!(v == [-3, -5, 2, 32, 4]);
534    /// ```
535    fn par_sort_by_cached_key<K, F>(&mut self, f: F)
536    where
537        F: Fn(&T) -> K + Sync,
538        K: Ord + Send,
539    {
540        let slice = self.as_parallel_slice_mut();
541        let len = slice.len();
542        if len < 2 {
543            return;
544        }
545
546        // Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
547        macro_rules! sort_by_key {
548            ($t:ty) => {{
549                let mut indices: Vec<_> = slice
550                    .par_iter_mut()
551                    .enumerate()
552                    .map(|(i, x)| (f(&*x), i as $t))
553                    .collect();
554                // The elements of `indices` are unique, as they are indexed, so any sort will be
555                // stable with respect to the original slice. We use `sort_unstable` here because
556                // it requires less memory allocation.
557                indices.par_sort_unstable();
558                for i in 0..len {
559                    let mut index = indices[i].1;
560                    while (index as usize) < i {
561                        index = indices[index as usize].1;
562                    }
563                    indices[i].1 = index;
564                    slice.swap(i, index as usize);
565                }
566            }};
567        }
568
569        let sz_u8 = size_of::<(K, u8)>();
570        let sz_u16 = size_of::<(K, u16)>();
571        let sz_u32 = size_of::<(K, u32)>();
572        let sz_usize = size_of::<(K, usize)>();
573
574        if sz_u8 < sz_u16 && len <= (u8::MAX as usize) {
575            return sort_by_key!(u8);
576        }
577        if sz_u16 < sz_u32 && len <= (u16::MAX as usize) {
578            return sort_by_key!(u16);
579        }
580        if sz_u32 < sz_usize && len <= (u32::MAX as usize) {
581            return sort_by_key!(u32);
582        }
583        sort_by_key!(usize)
584    }
585
586    /// Sorts the slice in parallel, but might not preserve the order of equal elements.
587    ///
588    /// This sort is unstable (i.e., may reorder equal elements), in-place
589    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
590    ///
591    /// # Current implementation
592    ///
593    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
594    /// which combines the fast average case of randomized quicksort with the fast worst case of
595    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
596    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
597    /// deterministic behavior.
598    ///
599    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
600    /// slice consists of several concatenated sorted sequences.
601    ///
602    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
603    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
604    /// parallel.
605    ///
606    /// [pdqsort]: https://github.com/orlp/pdqsort
607    ///
608    /// # Examples
609    ///
610    /// ```
611    /// use rayon::prelude::*;
612    ///
613    /// let mut v = [-5, 4, 1, -3, 2];
614    ///
615    /// v.par_sort_unstable();
616    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
617    /// ```
618    fn par_sort_unstable(&mut self)
619    where
620        T: Ord,
621    {
622        par_quicksort(self.as_parallel_slice_mut(), T::lt);
623    }
624
625    /// Sorts the slice in parallel with a comparator function, but might not preserve the order of
626    /// equal elements.
627    ///
628    /// This sort is unstable (i.e., may reorder equal elements), in-place
629    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
630    ///
631    /// The comparator function must define a total ordering for the elements in the slice. If
632    /// the ordering is not total, the order of the elements is unspecified. An order is a
633    /// total order if it is (for all `a`, `b` and `c`):
634    ///
635    /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
636    /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
637    ///
638    /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
639    /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
640    ///
641    /// ```
642    /// use rayon::prelude::*;
643    ///
644    /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
645    /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
646    /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
647    /// ```
648    ///
649    /// # Current implementation
650    ///
651    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
652    /// which combines the fast average case of randomized quicksort with the fast worst case of
653    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
654    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
655    /// deterministic behavior.
656    ///
657    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
658    /// slice consists of several concatenated sorted sequences.
659    ///
660    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
661    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
662    /// parallel.
663    ///
664    /// [pdqsort]: https://github.com/orlp/pdqsort
665    ///
666    /// # Examples
667    ///
668    /// ```
669    /// use rayon::prelude::*;
670    ///
671    /// let mut v = [5, 4, 1, 3, 2];
672    /// v.par_sort_unstable_by(|a, b| a.cmp(b));
673    /// assert_eq!(v, [1, 2, 3, 4, 5]);
674    ///
675    /// // reverse sorting
676    /// v.par_sort_unstable_by(|a, b| b.cmp(a));
677    /// assert_eq!(v, [5, 4, 3, 2, 1]);
678    /// ```
679    fn par_sort_unstable_by<F>(&mut self, compare: F)
680    where
681        F: Fn(&T, &T) -> Ordering + Sync,
682    {
683        par_quicksort(self.as_parallel_slice_mut(), |a, b| {
684            compare(a, b) == Ordering::Less
685        });
686    }
687
688    /// Sorts the slice in parallel with a key extraction function, but might not preserve the order
689    /// of equal elements.
690    ///
691    /// This sort is unstable (i.e., may reorder equal elements), in-place
692    /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case,
693    /// where the key function is *O*(*m*).
694    ///
695    /// # Current implementation
696    ///
697    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
698    /// which combines the fast average case of randomized quicksort with the fast worst case of
699    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
700    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
701    /// deterministic behavior.
702    ///
703    /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
704    /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
705    /// is expensive.
706    ///
707    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
708    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
709    /// parallel.
710    ///
711    /// [pdqsort]: https://github.com/orlp/pdqsort
712    ///
713    /// # Examples
714    ///
715    /// ```
716    /// use rayon::prelude::*;
717    ///
718    /// let mut v = [-5i32, 4, 1, -3, 2];
719    ///
720    /// v.par_sort_unstable_by_key(|k| k.abs());
721    /// assert_eq!(v, [1, 2, -3, 4, -5]);
722    /// ```
723    fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
724    where
725        K: Ord,
726        F: Fn(&T) -> K + Sync,
727    {
728        par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
729    }
730
731    /// Returns a parallel iterator over the slice producing non-overlapping mutable
732    /// runs of elements using the predicate to separate them.
733    ///
734    /// The predicate is called on two elements following themselves,
735    /// it means the predicate is called on `slice[0]` and `slice[1]`
736    /// then on `slice[1]` and `slice[2]` and so on.
737    ///
738    /// # Examples
739    ///
740    /// ```
741    /// use rayon::prelude::*;
742    /// let mut xs = [1, 2, 2, 3, 3, 3];
743    /// let chunks: Vec<_> = xs.par_chunk_by_mut(|&x, &y| x == y).collect();
744    /// assert_eq!(chunks[0], &mut [1]);
745    /// assert_eq!(chunks[1], &mut [2, 2]);
746    /// assert_eq!(chunks[2], &mut [3, 3, 3]);
747    /// ```
748    fn par_chunk_by_mut<F>(&mut self, pred: F) -> ChunkByMut<'_, T, F>
749    where
750        F: Fn(&T, &T) -> bool + Send + Sync,
751    {
752        ChunkByMut::new(self.as_parallel_slice_mut(), pred)
753    }
754}
755
756impl<T: Send> ParallelSliceMut<T> for [T] {
757    #[inline]
758    fn as_parallel_slice_mut(&mut self) -> &mut [T] {
759        self
760    }
761}
762
763impl<'data, T: Sync> IntoParallelIterator for &'data [T] {
764    type Item = &'data T;
765    type Iter = Iter<'data, T>;
766
767    fn into_par_iter(self) -> Self::Iter {
768        Iter { slice: self }
769    }
770}
771
772impl<'data, T: Sync> IntoParallelIterator for &'data Box<[T]> {
773    type Item = &'data T;
774    type Iter = Iter<'data, T>;
775
776    fn into_par_iter(self) -> Self::Iter {
777        Iter { slice: self }
778    }
779}
780
781impl<'data, T: Send> IntoParallelIterator for &'data mut [T] {
782    type Item = &'data mut T;
783    type Iter = IterMut<'data, T>;
784
785    fn into_par_iter(self) -> Self::Iter {
786        IterMut { slice: self }
787    }
788}
789
790impl<'data, T: Send> IntoParallelIterator for &'data mut Box<[T]> {
791    type Item = &'data mut T;
792    type Iter = IterMut<'data, T>;
793
794    fn into_par_iter(self) -> Self::Iter {
795        IterMut { slice: self }
796    }
797}
798
799/// Parallel iterator over immutable items in a slice
800#[derive(Debug)]
801pub struct Iter<'data, T> {
802    slice: &'data [T],
803}
804
805impl<T> Clone for Iter<'_, T> {
806    fn clone(&self) -> Self {
807        Iter { ..*self }
808    }
809}
810
811impl<'data, T: Sync> ParallelIterator for Iter<'data, T> {
812    type Item = &'data T;
813
814    fn drive_unindexed<C>(self, consumer: C) -> C::Result
815    where
816        C: UnindexedConsumer<Self::Item>,
817    {
818        bridge(self, consumer)
819    }
820
821    fn opt_len(&self) -> Option<usize> {
822        Some(self.len())
823    }
824}
825
826impl<T: Sync> IndexedParallelIterator for Iter<'_, T> {
827    fn drive<C>(self, consumer: C) -> C::Result
828    where
829        C: Consumer<Self::Item>,
830    {
831        bridge(self, consumer)
832    }
833
834    fn len(&self) -> usize {
835        self.slice.len()
836    }
837
838    fn with_producer<CB>(self, callback: CB) -> CB::Output
839    where
840        CB: ProducerCallback<Self::Item>,
841    {
842        callback.callback(IterProducer { slice: self.slice })
843    }
844}
845
846struct IterProducer<'data, T: Sync> {
847    slice: &'data [T],
848}
849
850impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
851    type Item = &'data T;
852    type IntoIter = ::std::slice::Iter<'data, T>;
853
854    fn into_iter(self) -> Self::IntoIter {
855        self.slice.iter()
856    }
857
858    fn split_at(self, index: usize) -> (Self, Self) {
859        let (left, right) = self.slice.split_at(index);
860        (IterProducer { slice: left }, IterProducer { slice: right })
861    }
862}
863
864/// Parallel iterator over immutable overlapping windows of a slice
865#[derive(Debug)]
866pub struct Windows<'data, T> {
867    window_size: usize,
868    slice: &'data [T],
869}
870
871impl<T> Clone for Windows<'_, T> {
872    fn clone(&self) -> Self {
873        Windows { ..*self }
874    }
875}
876
877impl<'data, T: Sync> ParallelIterator for Windows<'data, T> {
878    type Item = &'data [T];
879
880    fn drive_unindexed<C>(self, consumer: C) -> C::Result
881    where
882        C: UnindexedConsumer<Self::Item>,
883    {
884        bridge(self, consumer)
885    }
886
887    fn opt_len(&self) -> Option<usize> {
888        Some(self.len())
889    }
890}
891
892impl<T: Sync> IndexedParallelIterator for Windows<'_, T> {
893    fn drive<C>(self, consumer: C) -> C::Result
894    where
895        C: Consumer<Self::Item>,
896    {
897        bridge(self, consumer)
898    }
899
900    fn len(&self) -> usize {
901        assert!(self.window_size >= 1);
902        self.slice.len().saturating_sub(self.window_size - 1)
903    }
904
905    fn with_producer<CB>(self, callback: CB) -> CB::Output
906    where
907        CB: ProducerCallback<Self::Item>,
908    {
909        callback.callback(WindowsProducer {
910            window_size: self.window_size,
911            slice: self.slice,
912        })
913    }
914}
915
916struct WindowsProducer<'data, T: Sync> {
917    window_size: usize,
918    slice: &'data [T],
919}
920
921impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
922    type Item = &'data [T];
923    type IntoIter = ::std::slice::Windows<'data, T>;
924
925    fn into_iter(self) -> Self::IntoIter {
926        self.slice.windows(self.window_size)
927    }
928
929    fn split_at(self, index: usize) -> (Self, Self) {
930        let left_index = Ord::min(self.slice.len(), index + (self.window_size - 1));
931        let left = &self.slice[..left_index];
932        let right = &self.slice[index..];
933        (
934            WindowsProducer {
935                window_size: self.window_size,
936                slice: left,
937            },
938            WindowsProducer {
939                window_size: self.window_size,
940                slice: right,
941            },
942        )
943    }
944}
945
946/// Parallel iterator over mutable items in a slice
947#[derive(Debug)]
948pub struct IterMut<'data, T> {
949    slice: &'data mut [T],
950}
951
952impl<'data, T: Send> ParallelIterator for IterMut<'data, T> {
953    type Item = &'data mut T;
954
955    fn drive_unindexed<C>(self, consumer: C) -> C::Result
956    where
957        C: UnindexedConsumer<Self::Item>,
958    {
959        bridge(self, consumer)
960    }
961
962    fn opt_len(&self) -> Option<usize> {
963        Some(self.len())
964    }
965}
966
967impl<T: Send> IndexedParallelIterator for IterMut<'_, T> {
968    fn drive<C>(self, consumer: C) -> C::Result
969    where
970        C: Consumer<Self::Item>,
971    {
972        bridge(self, consumer)
973    }
974
975    fn len(&self) -> usize {
976        self.slice.len()
977    }
978
979    fn with_producer<CB>(self, callback: CB) -> CB::Output
980    where
981        CB: ProducerCallback<Self::Item>,
982    {
983        callback.callback(IterMutProducer { slice: self.slice })
984    }
985}
986
987struct IterMutProducer<'data, T: Send> {
988    slice: &'data mut [T],
989}
990
991impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
992    type Item = &'data mut T;
993    type IntoIter = ::std::slice::IterMut<'data, T>;
994
995    fn into_iter(self) -> Self::IntoIter {
996        self.slice.iter_mut()
997    }
998
999    fn split_at(self, index: usize) -> (Self, Self) {
1000        let (left, right) = self.slice.split_at_mut(index);
1001        (
1002            IterMutProducer { slice: left },
1003            IterMutProducer { slice: right },
1004        )
1005    }
1006}
1007
1008/// Parallel iterator over slices separated by a predicate
1009pub struct Split<'data, T, P> {
1010    slice: &'data [T],
1011    separator: P,
1012}
1013
1014impl<T, P: Clone> Clone for Split<'_, T, P> {
1015    fn clone(&self) -> Self {
1016        Split {
1017            separator: self.separator.clone(),
1018            ..*self
1019        }
1020    }
1021}
1022
1023impl<T: Debug, P> Debug for Split<'_, T, P> {
1024    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1025        f.debug_struct("Split").field("slice", &self.slice).finish()
1026    }
1027}
1028
1029impl<'data, T, P> ParallelIterator for Split<'data, T, P>
1030where
1031    P: Fn(&T) -> bool + Sync + Send,
1032    T: Sync,
1033{
1034    type Item = &'data [T];
1035
1036    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1037    where
1038        C: UnindexedConsumer<Self::Item>,
1039    {
1040        let producer = SplitProducer::new(self.slice, &self.separator);
1041        bridge_unindexed(producer, consumer)
1042    }
1043}
1044
1045/// Parallel iterator over slices separated by a predicate,
1046/// including the matched part as a terminator.
1047pub struct SplitInclusive<'data, T, P> {
1048    slice: &'data [T],
1049    separator: P,
1050}
1051
1052impl<T, P: Clone> Clone for SplitInclusive<'_, T, P> {
1053    fn clone(&self) -> Self {
1054        SplitInclusive {
1055            separator: self.separator.clone(),
1056            ..*self
1057        }
1058    }
1059}
1060
1061impl<T: Debug, P> Debug for SplitInclusive<'_, T, P> {
1062    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1063        f.debug_struct("SplitInclusive")
1064            .field("slice", &self.slice)
1065            .finish()
1066    }
1067}
1068
1069impl<'data, T, P> ParallelIterator for SplitInclusive<'data, T, P>
1070where
1071    P: Fn(&T) -> bool + Sync + Send,
1072    T: Sync,
1073{
1074    type Item = &'data [T];
1075
1076    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1077    where
1078        C: UnindexedConsumer<Self::Item>,
1079    {
1080        let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1081        bridge_unindexed(producer, consumer)
1082    }
1083}
1084
1085/// Implement support for `SplitProducer`.
1086impl<T, P> Fissile<P> for &[T]
1087where
1088    P: Fn(&T) -> bool,
1089{
1090    fn length(&self) -> usize {
1091        self.len()
1092    }
1093
1094    fn midpoint(&self, end: usize) -> usize {
1095        end / 2
1096    }
1097
1098    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1099        self[start..end].iter().position(separator)
1100    }
1101
1102    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1103        self[..end].iter().rposition(separator)
1104    }
1105
1106    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1107        if INCL {
1108            // include the separator in the left side
1109            self.split_at(index + 1)
1110        } else {
1111            let (left, right) = self.split_at(index);
1112            (left, &right[1..]) // skip the separator
1113        }
1114    }
1115
1116    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1117    where
1118        F: Folder<Self>,
1119        Self: Send,
1120    {
1121        if INCL {
1122            debug_assert!(!skip_last);
1123            folder.consume_iter(self.split_inclusive(separator))
1124        } else {
1125            let mut split = self.split(separator);
1126            if skip_last {
1127                split.next_back();
1128            }
1129            folder.consume_iter(split)
1130        }
1131    }
1132}
1133
1134/// Parallel iterator over mutable slices separated by a predicate
1135pub struct SplitMut<'data, T, P> {
1136    slice: &'data mut [T],
1137    separator: P,
1138}
1139
1140impl<T: Debug, P> Debug for SplitMut<'_, T, P> {
1141    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1142        f.debug_struct("SplitMut")
1143            .field("slice", &self.slice)
1144            .finish()
1145    }
1146}
1147
1148impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
1149where
1150    P: Fn(&T) -> bool + Sync + Send,
1151    T: Send,
1152{
1153    type Item = &'data mut [T];
1154
1155    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1156    where
1157        C: UnindexedConsumer<Self::Item>,
1158    {
1159        let producer = SplitProducer::new(self.slice, &self.separator);
1160        bridge_unindexed(producer, consumer)
1161    }
1162}
1163
1164/// Parallel iterator over mutable slices separated by a predicate,
1165/// including the matched part as a terminator.
1166pub struct SplitInclusiveMut<'data, T, P> {
1167    slice: &'data mut [T],
1168    separator: P,
1169}
1170
1171impl<T: Debug, P> Debug for SplitInclusiveMut<'_, T, P> {
1172    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1173        f.debug_struct("SplitInclusiveMut")
1174            .field("slice", &self.slice)
1175            .finish()
1176    }
1177}
1178
1179impl<'data, T, P> ParallelIterator for SplitInclusiveMut<'data, T, P>
1180where
1181    P: Fn(&T) -> bool + Sync + Send,
1182    T: Send,
1183{
1184    type Item = &'data mut [T];
1185
1186    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1187    where
1188        C: UnindexedConsumer<Self::Item>,
1189    {
1190        let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1191        bridge_unindexed(producer, consumer)
1192    }
1193}
1194
1195/// Implement support for `SplitProducer`.
1196impl<T, P> Fissile<P> for &mut [T]
1197where
1198    P: Fn(&T) -> bool,
1199{
1200    fn length(&self) -> usize {
1201        self.len()
1202    }
1203
1204    fn midpoint(&self, end: usize) -> usize {
1205        end / 2
1206    }
1207
1208    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1209        self[start..end].iter().position(separator)
1210    }
1211
1212    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1213        self[..end].iter().rposition(separator)
1214    }
1215
1216    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1217        if INCL {
1218            // include the separator in the left side
1219            self.split_at_mut(index + 1)
1220        } else {
1221            let (left, right) = self.split_at_mut(index);
1222            (left, &mut right[1..]) // skip the separator
1223        }
1224    }
1225
1226    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1227    where
1228        F: Folder<Self>,
1229        Self: Send,
1230    {
1231        if INCL {
1232            debug_assert!(!skip_last);
1233            folder.consume_iter(self.split_inclusive_mut(separator))
1234        } else {
1235            let mut split = self.split_mut(separator);
1236            if skip_last {
1237                split.next_back();
1238            }
1239            folder.consume_iter(split)
1240        }
1241    }
1242}