Path: blob/master/thirdparty/manifold/src/parallel.h
10278 views
// Copyright 2022 The Manifold Authors.1//2// Licensed under the Apache License, Version 2.0 (the "License");3// you may not use this file except in compliance with the License.4// You may obtain a copy of the License at5//6// http://www.apache.org/licenses/LICENSE-2.07//8// Unless required by applicable law or agreed to in writing, software9// distributed under the License is distributed on an "AS IS" BASIS,10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.11// See the License for the specific language governing permissions and12// limitations under the License.13//14// Simple implementation of selected functions in PSTL.15// Iterators must be RandomAccessIterator.1617#pragma once1819#include "iters.h"20#if (MANIFOLD_PAR == 1)21#include <tbb/combinable.h>22#include <tbb/parallel_for.h>23#include <tbb/parallel_invoke.h>24#include <tbb/parallel_reduce.h>25#include <tbb/parallel_scan.h>26#endif27#include <algorithm>28#include <numeric>2930namespace manifold {3132enum class ExecutionPolicy {33Par,34Seq,35};3637constexpr size_t kSeqThreshold = 1e4;38// ExecutionPolicy:39// - Sequential for small workload,40// - Parallel (CPU) for medium workload,41inline constexpr ExecutionPolicy autoPolicy(size_t size,42size_t threshold = kSeqThreshold) {43if (size <= threshold) {44return ExecutionPolicy::Seq;45}46return ExecutionPolicy::Par;47}4849template <typename Iter,50typename Dummy = std::enable_if_t<!std::is_integral_v<Iter>>>51inline constexpr ExecutionPolicy autoPolicy(Iter first, Iter last,52size_t threshold = kSeqThreshold) {53if (static_cast<size_t>(std::distance(first, last)) <= threshold) {54return ExecutionPolicy::Seq;55}56return ExecutionPolicy::Par;57}5859template <typename InputIter, typename OutputIter>60void copy(ExecutionPolicy policy, InputIter first, InputIter last,61OutputIter d_first);62template <typename InputIter, typename OutputIter>63void copy(InputIter first, InputIter last, OutputIter d_first);6465#if (MANIFOLD_PAR == 1)66namespace details {67using manifold::kSeqThreshold;68// implementation from69// https://duvanenko.tech.blog/2018/01/14/parallel-merge/70// https://github.com/DragonSpit/ParallelAlgorithms71// note that the ranges are now [p, r) to fit our convention.72template <typename SrcIter, typename DestIter, typename Comp>73void mergeRec(SrcIter src, DestIter dest, size_t p1, size_t r1, size_t p2,74size_t r2, size_t p3, Comp comp) {75size_t length1 = r1 - p1;76size_t length2 = r2 - p2;77if (length1 < length2) {78std::swap(p1, p2);79std::swap(r1, r2);80std::swap(length1, length2);81}82if (length1 == 0) return;83if (length1 + length2 <= kSeqThreshold) {84std::merge(src + p1, src + r1, src + p2, src + r2, dest + p3, comp);85} else {86size_t q1 = p1 + length1 / 2;87size_t q2 =88std::distance(src, std::lower_bound(src + p2, src + r2, src[q1], comp));89size_t q3 = p3 + (q1 - p1) + (q2 - p2);90dest[q3] = src[q1];91tbb::parallel_invoke(92[=] { mergeRec(src, dest, p1, q1, p2, q2, p3, comp); },93[=] { mergeRec(src, dest, q1 + 1, r1, q2, r2, q3 + 1, comp); });94}95}9697template <typename SrcIter, typename DestIter, typename Comp>98void mergeSortRec(SrcIter src, DestIter dest, size_t begin, size_t end,99Comp comp) {100size_t numElements = end - begin;101if (numElements <= kSeqThreshold) {102std::copy(src + begin, src + end, dest + begin);103std::stable_sort(dest + begin, dest + end, comp);104} else {105size_t middle = begin + numElements / 2;106tbb::parallel_invoke([=] { mergeSortRec(dest, src, begin, middle, comp); },107[=] { mergeSortRec(dest, src, middle, end, comp); });108mergeRec(src, dest, begin, middle, middle, end, begin, comp);109}110}111112template <typename T, typename InputIter, typename OutputIter, typename BinOp>113struct ScanBody {114T sum;115T identity;116BinOp& f;117InputIter input;118OutputIter output;119120ScanBody(T sum, T identity, BinOp& f, InputIter input, OutputIter output)121: sum(sum), identity(identity), f(f), input(input), output(output) {}122ScanBody(ScanBody& b, tbb::split)123: sum(b.identity),124identity(b.identity),125f(b.f),126input(b.input),127output(b.output) {}128template <typename Tag>129void operator()(const tbb::blocked_range<size_t>& r, Tag) {130T temp = sum;131for (size_t i = r.begin(); i < r.end(); ++i) {132T inputTmp = input[i];133if (Tag::is_final_scan()) output[i] = temp;134temp = f(temp, inputTmp);135}136sum = temp;137}138T get_sum() const { return sum; }139void reverse_join(ScanBody& a) { sum = f(a.sum, sum); }140void assign(ScanBody& b) { sum = b.sum; }141};142143template <typename InputIter, typename OutputIter, typename P>144struct CopyIfScanBody {145size_t sum;146P& pred;147InputIter input;148OutputIter output;149150CopyIfScanBody(P& pred, InputIter input, OutputIter output)151: sum(0), pred(pred), input(input), output(output) {}152CopyIfScanBody(CopyIfScanBody& b, tbb::split)153: sum(0), pred(b.pred), input(b.input), output(b.output) {}154template <typename Tag>155void operator()(const tbb::blocked_range<size_t>& r, Tag) {156size_t temp = sum;157for (size_t i = r.begin(); i < r.end(); ++i) {158if (pred(i)) {159temp += 1;160if (Tag::is_final_scan()) output[temp - 1] = input[i];161}162}163sum = temp;164}165size_t get_sum() const { return sum; }166void reverse_join(CopyIfScanBody& a) { sum = a.sum + sum; }167void assign(CopyIfScanBody& b) { sum = b.sum; }168};169170template <typename N, const int K>171struct Hist {172using SizeType = N;173static constexpr int k = K;174N hist[k][256] = {{0}};175void merge(const Hist<N, K>& other) {176for (int i = 0; i < k; ++i)177for (int j = 0; j < 256; ++j) hist[i][j] += other.hist[i][j];178}179void prefixSum(N total, bool* canSkip) {180for (int i = 0; i < k; ++i) {181size_t count = 0;182for (int j = 0; j < 256; ++j) {183N tmp = hist[i][j];184hist[i][j] = count;185count += tmp;186if (tmp == total) canSkip[i] = true;187}188}189}190};191192template <typename T, typename H>193void histogram(T* ptr, typename H::SizeType n, H& hist) {194auto worker = [](T* ptr, typename H::SizeType n, H& hist) {195for (typename H::SizeType i = 0; i < n; ++i)196for (int k = 0; k < hist.k; ++k)197++hist.hist[k][(ptr[i] >> (8 * k)) & 0xFF];198};199if (n < kSeqThreshold) {200worker(ptr, n, hist);201} else {202tbb::combinable<H> store;203tbb::parallel_for(204tbb::blocked_range<typename H::SizeType>(0, n, kSeqThreshold),205[&worker, &store, ptr](const auto& r) {206worker(ptr + r.begin(), r.end() - r.begin(), store.local());207});208store.combine_each([&hist](const H& h) { hist.merge(h); });209}210}211212template <typename T, typename H>213void shuffle(T* src, T* target, typename H::SizeType n, H& hist, int k) {214for (typename H::SizeType i = 0; i < n; ++i)215target[hist.hist[k][(src[i] >> (8 * k)) & 0xFF]++] = src[i];216}217218template <typename T, typename SizeType>219bool LSB_radix_sort(T* input, T* tmp, SizeType n) {220Hist<SizeType, sizeof(T) / sizeof(char)> hist;221if (std::is_sorted(input, input + n)) return false;222histogram(input, n, hist);223bool canSkip[hist.k] = {0};224hist.prefixSum(n, canSkip);225T *a = input, *b = tmp;226for (int k = 0; k < hist.k; ++k) {227if (!canSkip[k]) {228shuffle(a, b, n, hist, k);229std::swap(a, b);230}231}232return a == tmp;233}234235// LSB radix sort with merge236template <typename T, typename SizeType>237struct SortedRange {238T *input, *tmp;239SizeType offset = 0, length = 0;240bool inTmp = false;241242SortedRange(T* input, T* tmp, SizeType offset = 0, SizeType length = 0)243: input(input), tmp(tmp), offset(offset), length(length) {}244SortedRange(SortedRange<T, SizeType>& r, tbb::split)245: input(r.input), tmp(r.tmp) {}246// FIXME: no idea why thread sanitizer reports data race here247#if defined(__has_feature)248#if __has_feature(thread_sanitizer)249__attribute__((no_sanitize("thread")))250#endif251#endif252void253operator()(const tbb::blocked_range<SizeType>& range) {254SortedRange<T, SizeType> rhs(input, tmp, range.begin(),255range.end() - range.begin());256rhs.inTmp =257LSB_radix_sort(input + rhs.offset, tmp + rhs.offset, rhs.length);258if (length == 0)259*this = rhs;260else261join(rhs);262}263bool swapBuffer() const {264T *src = input, *target = tmp;265if (inTmp) std::swap(src, target);266copy(src + offset, src + offset + length, target + offset);267return !inTmp;268}269void join(const SortedRange<T, SizeType>& rhs) {270if (inTmp != rhs.inTmp) {271if (length < rhs.length)272inTmp = swapBuffer();273else274rhs.swapBuffer();275}276T *src = input, *target = tmp;277if (inTmp) std::swap(src, target);278if (src[offset + length - 1] > src[rhs.offset]) {279mergeRec(src, target, offset, offset + length, rhs.offset,280rhs.offset + rhs.length, offset, std::less<T>());281inTmp = !inTmp;282}283length += rhs.length;284}285};286287template <typename T, typename SizeTy>288void radix_sort(T* input, SizeTy n) {289T* aux = new T[n];290SizeTy blockSize = std::max(n / tbb::this_task_arena::max_concurrency() / 4,291static_cast<SizeTy>(kSeqThreshold / sizeof(T)));292SortedRange<T, SizeTy> result(input, aux);293tbb::parallel_reduce(tbb::blocked_range<SizeTy>(0, n, blockSize), result);294if (result.inTmp) copy(aux, aux + n, input);295delete[] aux;296}297298template <typename Iterator,299typename T = typename std::iterator_traits<Iterator>::value_type,300typename Comp = decltype(std::less<T>())>301void mergeSort(ExecutionPolicy policy, Iterator first, Iterator last,302Comp comp) {303#if (MANIFOLD_PAR == 1)304if (policy == ExecutionPolicy::Par) {305// apparently this prioritizes threads inside here?306tbb::this_task_arena::isolate([&] {307size_t length = std::distance(first, last);308T* tmp = new T[length];309copy(policy, first, last, tmp);310details::mergeSortRec(tmp, first, 0, length, comp);311delete[] tmp;312});313return;314}315#endif316std::stable_sort(first, last, comp);317}318319// stable_sort using merge sort.320//321// For simpler implementation, we do not support types that are not trivially322// destructable.323template <typename Iterator,324typename T = typename std::iterator_traits<Iterator>::value_type,325typename Dummy = void>326struct SortFunctor {327void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {328static_assert(329std::is_convertible_v<330typename std::iterator_traits<Iterator>::iterator_category,331std::random_access_iterator_tag>,332"You can only parallelize RandomAccessIterator.");333static_assert(std::is_trivially_destructible_v<T>,334"Our simple implementation does not support types that are "335"not trivially destructable.");336return mergeSort(policy, first, last, std::less<T>());337}338};339340// stable_sort specialized with radix sort for integral types.341// Typically faster than merge sort.342template <typename Iterator, typename T>343struct SortFunctor<344Iterator, T,345std::enable_if_t<346std::is_integral_v<T> &&347std::is_pointer_v<typename std::iterator_traits<Iterator>::pointer>>> {348void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {349static_assert(350std::is_convertible_v<351typename std::iterator_traits<Iterator>::iterator_category,352std::random_access_iterator_tag>,353"You can only parallelize RandomAccessIterator.");354static_assert(std::is_trivially_destructible_v<T>,355"Our simple implementation does not support types that are "356"not trivially destructable.");357#if (MANIFOLD_PAR == 1)358if (policy == ExecutionPolicy::Par) {359radix_sort(&*first, static_cast<size_t>(std::distance(first, last)));360return;361}362#endif363stable_sort(policy, first, last, std::less<T>());364}365};366367} // namespace details368369#endif370371// Applies the function `f` to each element in the range `[first, last)`372template <typename Iter, typename F>373void for_each(ExecutionPolicy policy, Iter first, Iter last, F f) {374static_assert(std::is_convertible_v<375typename std::iterator_traits<Iter>::iterator_category,376std::random_access_iterator_tag>,377"You can only parallelize RandomAccessIterator.");378(void)policy;379#if (MANIFOLD_PAR == 1)380if (policy == ExecutionPolicy::Par) {381tbb::this_task_arena::isolate([&]() {382tbb::parallel_for(tbb::blocked_range<Iter>(first, last),383[&f](const tbb::blocked_range<Iter>& range) {384for (Iter i = range.begin(); i != range.end(); i++)385f(*i);386});387});388return;389}390#endif391std::for_each(first, last, f);392}393394// Applies the function `f` to each element in the range `[first, last)`395template <typename Iter, typename F>396void for_each_n(ExecutionPolicy policy, Iter first, size_t n, F f) {397static_assert(std::is_convertible_v<398typename std::iterator_traits<Iter>::iterator_category,399std::random_access_iterator_tag>,400"You can only parallelize RandomAccessIterator.");401for_each(policy, first, first + n, f);402}403404// Reduce the range `[first, last)` using a binary operation `f` with an initial405// value `init`.406//407// The binary operation should be commutative and associative. Otherwise, the408// result is non-deterministic.409template <typename InputIter, typename BinaryOp,410typename T = typename std::iterator_traits<InputIter>::value_type>411T reduce(ExecutionPolicy policy, InputIter first, InputIter last, T init,412BinaryOp f) {413static_assert(std::is_convertible_v<414typename std::iterator_traits<InputIter>::iterator_category,415std::random_access_iterator_tag>,416"You can only parallelize RandomAccessIterator.");417(void)policy;418#if (MANIFOLD_PAR == 1)419if (policy == ExecutionPolicy::Par) {420// should we use deterministic reduce here?421return tbb::this_task_arena::isolate([&]() {422return tbb::parallel_reduce(423tbb::blocked_range<InputIter>(first, last, details::kSeqThreshold),424init,425[&f](const tbb::blocked_range<InputIter>& range, T value) {426return std::reduce(range.begin(), range.end(), value, f);427},428f);429});430}431#endif432return std::reduce(first, last, init, f);433}434435// Reduce the range `[first, last)` using a binary operation `f` with an initial436// value `init`.437//438// The binary operation should be commutative and associative. Otherwise, the439// result is non-deterministic.440template <typename InputIter, typename BinaryOp,441typename T = typename std::iterator_traits<InputIter>::value_type>442T reduce(InputIter first, InputIter last, T init, BinaryOp f) {443return reduce(autoPolicy(first, last, 1e5), first, last, init, f);444}445446// Transform and reduce the range `[first, last)` by first applying a unary447// function `g`, and then combining the results using a binary operation `f`448// with an initial value `init`.449//450// The binary operation should be commutative and associative. Otherwise, the451// result is non-deterministic.452template <typename InputIter, typename BinaryOp, typename UnaryOp,453typename T = std::invoke_result_t<454UnaryOp, typename std::iterator_traits<InputIter>::value_type>>455T transform_reduce(ExecutionPolicy policy, InputIter first, InputIter last,456T init, BinaryOp f, UnaryOp g) {457return reduce(policy, TransformIterator(first, g), TransformIterator(last, g),458init, f);459}460461// Transform and reduce the range `[first, last)` by first applying a unary462// function `g`, and then combining the results using a binary operation `f`463// with an initial value `init`.464//465// The binary operation should be commutative and associative. Otherwise, the466// result is non-deterministic.467template <typename InputIter, typename BinaryOp, typename UnaryOp,468typename T = std::invoke_result_t<469UnaryOp, typename std::iterator_traits<InputIter>::value_type>>470T transform_reduce(InputIter first, InputIter last, T init, BinaryOp f,471UnaryOp g) {472return manifold::reduce(TransformIterator(first, g),473TransformIterator(last, g), init, f);474}475476// Compute the inclusive prefix sum for the range `[first, last)`477// using the summation operator, and store the result in the range478// starting from `d_first`.479//480// The input range `[first, last)` and481// the output range `[d_first, d_first + last - first)`482// must be equal or non-overlapping.483template <typename InputIter, typename OutputIter,484typename T = typename std::iterator_traits<InputIter>::value_type>485void inclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,486OutputIter d_first) {487static_assert(std::is_convertible_v<488typename std::iterator_traits<InputIter>::iterator_category,489std::random_access_iterator_tag>,490"You can only parallelize RandomAccessIterator.");491static_assert(492std::is_convertible_v<493typename std::iterator_traits<OutputIter>::iterator_category,494std::random_access_iterator_tag>,495"You can only parallelize RandomAccessIterator.");496(void)policy;497#if (MANIFOLD_PAR == 1)498if (policy == ExecutionPolicy::Par) {499tbb::this_task_arena::isolate([&]() {500tbb::parallel_scan(501tbb::blocked_range<size_t>(0, std::distance(first, last)),502static_cast<T>(0),503[&](const tbb::blocked_range<size_t>& range, T sum,504bool is_final_scan) {505T temp = sum;506for (size_t i = range.begin(); i < range.end(); ++i) {507temp = temp + first[i];508if (is_final_scan) d_first[i] = temp;509}510return temp;511},512std::plus<T>());513});514return;515}516#endif517std::inclusive_scan(first, last, d_first);518}519520// Compute the inclusive prefix sum for the range `[first, last)` using the521// summation operator, and store the result in the range522// starting from `d_first`.523//524// The input range `[first, last)` and525// the output range `[d_first, d_first + last - first)`526// must be equal or non-overlapping.527template <typename InputIter, typename OutputIter,528typename T = typename std::iterator_traits<InputIter>::value_type>529void inclusive_scan(InputIter first, InputIter last, OutputIter d_first) {530return inclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first);531}532533// Compute the inclusive prefix sum for the range `[first, last)` using the534// binary operator `f`, with initial value `init` and535// identity element `identity`, and store the result in the range536// starting from `d_first`.537//538// This is different from `exclusive_scan` in the sequential algorithm by539// requiring an identity element. This is needed so that each block can be540// scanned in parallel and combined later.541//542// The input range `[first, last)` and543// the output range `[d_first, d_first + last - first)`544// must be equal or non-overlapping.545template <typename InputIter, typename OutputIter,546typename BinOp = decltype(std::plus<typename std::iterator_traits<547InputIter>::value_type>()),548typename T = typename std::iterator_traits<InputIter>::value_type>549void exclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,550OutputIter d_first, T init = static_cast<T>(0),551BinOp f = std::plus<T>(), T identity = static_cast<T>(0)) {552static_assert(std::is_convertible_v<553typename std::iterator_traits<InputIter>::iterator_category,554std::random_access_iterator_tag>,555"You can only parallelize RandomAccessIterator.");556static_assert(557std::is_convertible_v<558typename std::iterator_traits<OutputIter>::iterator_category,559std::random_access_iterator_tag>,560"You can only parallelize RandomAccessIterator.");561(void)policy;562(void)identity;563#if (MANIFOLD_PAR == 1)564if (policy == ExecutionPolicy::Par) {565details::ScanBody<T, InputIter, OutputIter, BinOp> body(init, identity, f,566first, d_first);567tbb::this_task_arena::isolate([&]() {568tbb::parallel_scan(569tbb::blocked_range<size_t>(0, std::distance(first, last)), body);570});571return;572}573#endif574std::exclusive_scan(first, last, d_first, init, f);575}576577// Compute the inclusive prefix sum for the range `[first, last)` using the578// binary operator `f`, with initial value `init` and579// identity element `identity`, and store the result in the range580// starting from `d_first`.581//582// This is different from `exclusive_scan` in the sequential algorithm by583// requiring an identity element. This is needed so that each block can be584// scanned in parallel and combined later.585//586// The input range `[first, last)` and587// the output range `[d_first, d_first + last - first)`588// must be equal or non-overlapping.589template <typename InputIter, typename OutputIter,590typename BinOp = decltype(std::plus<typename std::iterator_traits<591InputIter>::value_type>()),592typename T = typename std::iterator_traits<InputIter>::value_type>593void exclusive_scan(InputIter first, InputIter last, OutputIter d_first,594T init = static_cast<T>(0), BinOp f = std::plus<T>(),595T identity = static_cast<T>(0)) {596exclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first, init, f,597identity);598}599600// Apply function `f` on the input range `[first, last)` and store the result in601// the range starting from `d_first`.602//603// The input range `[first, last)` and604// the output range `[d_first, d_first + last - first)`605// must be equal or non-overlapping.606template <typename InputIter, typename OutputIter, typename F>607void transform(ExecutionPolicy policy, InputIter first, InputIter last,608OutputIter d_first, F f) {609static_assert(std::is_convertible_v<610typename std::iterator_traits<InputIter>::iterator_category,611std::random_access_iterator_tag>,612"You can only parallelize RandomAccessIterator.");613static_assert(614std::is_convertible_v<615typename std::iterator_traits<OutputIter>::iterator_category,616std::random_access_iterator_tag>,617"You can only parallelize RandomAccessIterator.");618(void)policy;619#if (MANIFOLD_PAR == 1)620if (policy == ExecutionPolicy::Par) {621tbb::this_task_arena::isolate([&]() {622tbb::parallel_for(tbb::blocked_range<size_t>(6230, static_cast<size_t>(std::distance(first, last))),624[&](const tbb::blocked_range<size_t>& range) {625std::transform(first + range.begin(),626first + range.end(),627d_first + range.begin(), f);628});629});630return;631}632#endif633std::transform(first, last, d_first, f);634}635636// Apply function `f` on the input range `[first, last)` and store the result in637// the range starting from `d_first`.638//639// The input range `[first, last)` and640// the output range `[d_first, d_first + last - first)`641// must be equal or non-overlapping.642template <typename InputIter, typename OutputIter, typename F>643void transform(InputIter first, InputIter last, OutputIter d_first, F f) {644transform(autoPolicy(first, last, 1e5), first, last, d_first, f);645}646647// Copy the input range `[first, last)` to the output range648// starting from `d_first`.649//650// The input range `[first, last)` and651// the output range `[d_first, d_first + last - first)`652// must not overlap.653template <typename InputIter, typename OutputIter>654void copy(ExecutionPolicy policy, InputIter first, InputIter last,655OutputIter d_first) {656static_assert(std::is_convertible_v<657typename std::iterator_traits<InputIter>::iterator_category,658std::random_access_iterator_tag>,659"You can only parallelize RandomAccessIterator.");660static_assert(661std::is_convertible_v<662typename std::iterator_traits<OutputIter>::iterator_category,663std::random_access_iterator_tag>,664"You can only parallelize RandomAccessIterator.");665(void)policy;666#if (MANIFOLD_PAR == 1)667if (policy == ExecutionPolicy::Par) {668tbb::this_task_arena::isolate([&]() {669tbb::parallel_for(tbb::blocked_range<size_t>(6700, static_cast<size_t>(std::distance(first, last)),671details::kSeqThreshold),672[&](const tbb::blocked_range<size_t>& range) {673std::copy(first + range.begin(), first + range.end(),674d_first + range.begin());675});676});677return;678}679#endif680std::copy(first, last, d_first);681}682683// Copy the input range `[first, last)` to the output range684// starting from `d_first`.685//686// The input range `[first, last)` and687// the output range `[d_first, d_first + last - first)`688// must not overlap.689template <typename InputIter, typename OutputIter>690void copy(InputIter first, InputIter last, OutputIter d_first) {691copy(autoPolicy(first, last, 1e6), first, last, d_first);692}693694// Copy the input range `[first, first + n)` to the output range695// starting from `d_first`.696//697// The input range `[first, first + n)` and698// the output range `[d_first, d_first + n)`699// must not overlap.700template <typename InputIter, typename OutputIter>701void copy_n(ExecutionPolicy policy, InputIter first, size_t n,702OutputIter d_first) {703copy(policy, first, first + n, d_first);704}705706// Copy the input range `[first, first + n)` to the output range707// starting from `d_first`.708//709// The input range `[first, first + n)` and710// the output range `[d_first, d_first + n)`711// must not overlap.712template <typename InputIter, typename OutputIter>713void copy_n(InputIter first, size_t n, OutputIter d_first) {714copy(autoPolicy(n, 1e6), first, first + n, d_first);715}716717// Fill the range `[first, last)` with `value`.718template <typename OutputIter, typename T>719void fill(ExecutionPolicy policy, OutputIter first, OutputIter last, T value) {720static_assert(721std::is_convertible_v<722typename std::iterator_traits<OutputIter>::iterator_category,723std::random_access_iterator_tag>,724"You can only parallelize RandomAccessIterator.");725(void)policy;726#if (MANIFOLD_PAR == 1)727if (policy == ExecutionPolicy::Par) {728tbb::this_task_arena::isolate([&]() {729tbb::parallel_for(tbb::blocked_range<OutputIter>(first, last),730[&](const tbb::blocked_range<OutputIter>& range) {731std::fill(range.begin(), range.end(), value);732});733});734return;735}736#endif737std::fill(first, last, value);738}739740// Fill the range `[first, last)` with `value`.741template <typename OutputIter, typename T>742void fill(OutputIter first, OutputIter last, T value) {743fill(autoPolicy(first, last, 5e5), first, last, value);744}745746// Count the number of elements in the input range `[first, last)` satisfying747// predicate `pred`, i.e. `pred(x) == true`.748template <typename InputIter, typename P>749size_t count_if(ExecutionPolicy policy, InputIter first, InputIter last,750P pred) {751(void)policy;752#if (MANIFOLD_PAR == 1)753if (policy == ExecutionPolicy::Par) {754return reduce(policy, TransformIterator(first, pred),755TransformIterator(last, pred), 0, std::plus<size_t>());756}757#endif758return std::count_if(first, last, pred);759}760761// Count the number of elements in the input range `[first, last)` satisfying762// predicate `pred`, i.e. `pred(x) == true`.763template <typename InputIter, typename P>764size_t count_if(InputIter first, InputIter last, P pred) {765return count_if(autoPolicy(first, last, 1e4), first, last, pred);766}767768// Check if all elements in the input range `[first, last)` satisfy769// predicate `pred`, i.e. `pred(x) == true`.770template <typename InputIter, typename P>771bool all_of(ExecutionPolicy policy, InputIter first, InputIter last, P pred) {772static_assert(std::is_convertible_v<773typename std::iterator_traits<InputIter>::iterator_category,774std::random_access_iterator_tag>,775"You can only parallelize RandomAccessIterator.");776(void)policy;777#if (MANIFOLD_PAR == 1)778if (policy == ExecutionPolicy::Par) {779// should we use deterministic reduce here?780return tbb::this_task_arena::isolate([&]() {781return tbb::parallel_reduce(782tbb::blocked_range<InputIter>(first, last), true,783[&](const tbb::blocked_range<InputIter>& range, bool value) {784if (!value) return false;785for (InputIter i = range.begin(); i != range.end(); i++)786if (!pred(*i)) return false;787return true;788},789[](bool a, bool b) { return a && b; });790});791}792#endif793return std::all_of(first, last, pred);794}795796// Check if all elements in the input range `[first, last)` satisfy797// predicate `pred`, i.e. `pred(x) == true`.798template <typename InputIter, typename P>799bool all_of(InputIter first, InputIter last, P pred) {800return all_of(autoPolicy(first, last, 1e5), first, last, pred);801}802803// Copy values in the input range `[first, last)` to the output range804// starting from `d_first` that satisfies the predicate `pred`,805// i.e. `pred(x) == true`, and returns `d_first + n` where `n` is the number of806// times the predicate is evaluated to true.807//808// This function is stable, meaning that the relative order of elements in the809// output range remains unchanged.810//811// The input range `[first, last)` and812// the output range `[d_first, d_first + last - first)`813// must not overlap.814template <typename InputIter, typename OutputIter, typename P>815OutputIter copy_if(ExecutionPolicy policy, InputIter first, InputIter last,816OutputIter d_first, P pred) {817static_assert(std::is_convertible_v<818typename std::iterator_traits<InputIter>::iterator_category,819std::random_access_iterator_tag>,820"You can only parallelize RandomAccessIterator.");821static_assert(822std::is_convertible_v<823typename std::iterator_traits<OutputIter>::iterator_category,824std::random_access_iterator_tag>,825"You can only parallelize RandomAccessIterator.");826(void)policy;827#if (MANIFOLD_PAR == 1)828if (policy == ExecutionPolicy::Par) {829auto pred2 = [&](size_t i) { return pred(first[i]); };830details::CopyIfScanBody body(pred2, first, d_first);831tbb::this_task_arena::isolate([&]() {832tbb::parallel_scan(833tbb::blocked_range<size_t>(0, std::distance(first, last)), body);834return d_first + body.get_sum();835});836}837#endif838return std::copy_if(first, last, d_first, pred);839}840841// Copy values in the input range `[first, last)` to the output range842// starting from `d_first` that satisfies the predicate `pred`, i.e. `pred(x) ==843// true`, and returns `d_first + n` where `n` is the number of times the844// predicate is evaluated to true.845//846// This function is stable, meaning that the relative order of elements in the847// output range remains unchanged.848//849// The input range `[first, last)` and850// the output range `[d_first, d_first + last - first)`851// must not overlap.852template <typename InputIter, typename OutputIter, typename P>853OutputIter copy_if(InputIter first, InputIter last, OutputIter d_first,854P pred) {855return copy_if(autoPolicy(first, last, 1e5), first, last, d_first, pred);856}857858// Remove values in the input range `[first, last)` that satisfies859// the predicate `pred`, i.e. `pred(x) == true`, and returns `first + n`860// where `n` is the number of times the predicate is evaluated to false.861//862// This function is stable, meaning that the relative order of elements that863// remained are unchanged.864//865// Only trivially destructable types are supported.866template <typename Iter, typename P,867typename T = typename std::iterator_traits<Iter>::value_type>868Iter remove_if(ExecutionPolicy policy, Iter first, Iter last, P pred) {869static_assert(std::is_convertible_v<870typename std::iterator_traits<Iter>::iterator_category,871std::random_access_iterator_tag>,872"You can only parallelize RandomAccessIterator.");873static_assert(std::is_trivially_destructible_v<T>,874"Our simple implementation does not support types that are "875"not trivially destructable.");876(void)policy;877#if (MANIFOLD_PAR == 1)878if (policy == ExecutionPolicy::Par) {879T* tmp = new T[std::distance(first, last)];880auto back =881copy_if(policy, first, last, tmp, [&](T v) { return !pred(v); });882copy(policy, tmp, back, first);883auto d = std::distance(tmp, back);884delete[] tmp;885return first + d;886}887#endif888return std::remove_if(first, last, pred);889}890891// Remove values in the input range `[first, last)` that satisfies892// the predicate `pred`, i.e. `pred(x) == true`, and893// returns `first + n` where `n` is the number of times the predicate is894// evaluated to false.895//896// This function is stable, meaning that the relative order of elements that897// remained are unchanged.898//899// Only trivially destructable types are supported.900template <typename Iter, typename P,901typename T = typename std::iterator_traits<Iter>::value_type>902Iter remove_if(Iter first, Iter last, P pred) {903return remove_if(autoPolicy(first, last, 1e4), first, last, pred);904}905906// Remove values in the input range `[first, last)` that are equal to `value`.907// Returns `first + n` where `n` is the number of values908// that are not equal to `value`.909//910// This function is stable, meaning that the relative order of elements that911// remained are unchanged.912//913// Only trivially destructable types are supported.914template <typename Iter,915typename T = typename std::iterator_traits<Iter>::value_type>916Iter remove(ExecutionPolicy policy, Iter first, Iter last, T value) {917static_assert(std::is_convertible_v<918typename std::iterator_traits<Iter>::iterator_category,919std::random_access_iterator_tag>,920"You can only parallelize RandomAccessIterator.");921static_assert(std::is_trivially_destructible_v<T>,922"Our simple implementation does not support types that are "923"not trivially destructable.");924(void)policy;925#if (MANIFOLD_PAR == 1)926if (policy == ExecutionPolicy::Par) {927T* tmp = new T[std::distance(first, last)];928auto back =929copy_if(policy, first, last, tmp, [&](T v) { return v != value; });930copy(policy, tmp, back, first);931auto d = std::distance(tmp, back);932delete[] tmp;933return first + d;934}935#endif936return std::remove(first, last, value);937}938939// Remove values in the input range `[first, last)` that are equal to `value`.940// Returns `first + n` where `n` is the number of values941// that are not equal to `value`.942//943// This function is stable, meaning that the relative order of elements that944// remained are unchanged.945//946// Only trivially destructable types are supported.947template <typename Iter,948typename T = typename std::iterator_traits<Iter>::value_type>949Iter remove(Iter first, Iter last, T value) {950return remove(autoPolicy(first, last, 1e4), first, last, value);951}952953// For each group of consecutive elements in the range `[first, last)` with the954// same value, unique removes all but the first element of the group. The return955// value is an iterator `new_last` such that no two consecutive elements in the956// range `[first, new_last)` are equal.957//958// This function is stable, meaning that the relative order of elements that959// remained are unchanged.960//961// Only trivially destructable types are supported.962template <typename Iter,963typename T = typename std::iterator_traits<Iter>::value_type>964Iter unique(ExecutionPolicy policy, Iter first, Iter last) {965static_assert(std::is_convertible_v<966typename std::iterator_traits<Iter>::iterator_category,967std::random_access_iterator_tag>,968"You can only parallelize RandomAccessIterator.");969static_assert(std::is_trivially_destructible_v<T>,970"Our simple implementation does not support types that are "971"not trivially destructable.");972(void)policy;973#if (MANIFOLD_PAR == 1)974if (policy == ExecutionPolicy::Par && first != last) {975Iter newSrcStart = first;976// cap the maximum buffer size, proved to be beneficial for unique with huge977// array size978constexpr size_t MAX_BUFFER_SIZE = 1 << 16;979T* tmp = new T[std::min(MAX_BUFFER_SIZE,980static_cast<size_t>(std::distance(first, last)))];981auto pred = [&](size_t i) { return tmp[i] != tmp[i + 1]; };982do {983size_t length =984std::min(MAX_BUFFER_SIZE,985static_cast<size_t>(std::distance(newSrcStart, last)));986copy(policy, newSrcStart, newSrcStart + length, tmp);987*first = *newSrcStart;988// this is not a typo, the index i is offset by 1, so to compare an989// element with its predecessor we need to compare i and i + 1.990details::CopyIfScanBody body(pred, tmp + 1, first + 1);991tbb::this_task_arena::isolate([&]() {992tbb::parallel_scan(tbb::blocked_range<size_t>(0, length - 1), body);993});994first += body.get_sum() + 1;995newSrcStart += length;996} while (newSrcStart != last);997delete[] tmp;998return first;999}1000#endif1001return std::unique(first, last);1002}10031004// For each group of consecutive elements in the range `[first, last)` with the1005// same value, unique removes all but the first element of the group. The return1006// value is an iterator `new_last` such that no two consecutive elements in the1007// range `[first, new_last)` are equal.1008//1009// This function is stable, meaning that the relative order of elements that1010// remained are unchanged.1011//1012// Only trivially destructable types are supported.1013template <typename Iter,1014typename T = typename std::iterator_traits<Iter>::value_type>1015Iter unique(Iter first, Iter last) {1016return unique(autoPolicy(first, last, 1e4), first, last);1017}10181019// Sort the input range `[first, last)` in ascending order.1020//1021// This function is stable, meaning that the relative order of elements that are1022// incomparable remains unchanged.1023//1024// Only trivially destructable types are supported.1025template <typename Iterator,1026typename T = typename std::iterator_traits<Iterator>::value_type>1027void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last) {1028(void)policy;1029#if (MANIFOLD_PAR == 1)1030details::SortFunctor<Iterator, T>()(policy, first, last);1031#else1032std::stable_sort(first, last);1033#endif1034}10351036// Sort the input range `[first, last)` in ascending order.1037//1038// This function is stable, meaning that the relative order of elements that are1039// incomparable remains unchanged.1040//1041// Only trivially destructable types are supported.1042template <typename Iterator,1043typename T = typename std::iterator_traits<Iterator>::value_type>1044void stable_sort(Iterator first, Iterator last) {1045stable_sort(autoPolicy(first, last, 1e4), first, last);1046}10471048// Sort the input range `[first, last)` in ascending order using the comparison1049// function `comp`.1050//1051// This function is stable, meaning that the relative order of elements that are1052// incomparable remains unchanged.1053//1054// Only trivially destructable types are supported.1055template <typename Iterator,1056typename T = typename std::iterator_traits<Iterator>::value_type,1057typename Comp = decltype(std::less<T>())>1058void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last,1059Comp comp) {1060(void)policy;1061#if (MANIFOLD_PAR == 1)1062details::mergeSort(policy, first, last, comp);1063#else1064std::stable_sort(first, last, comp);1065#endif1066}10671068// Sort the input range `[first, last)` in ascending order using the comparison1069// function `comp`.1070//1071// This function is stable, meaning that the relative order of elements that are1072// incomparable remains unchanged.1073//1074// Only trivially destructable types are supported.1075template <typename Iterator,1076typename T = typename std::iterator_traits<Iterator>::value_type,1077typename Comp = decltype(std::less<T>())>1078void stable_sort(Iterator first, Iterator last, Comp comp) {1079stable_sort(autoPolicy(first, last, 1e4), first, last, comp);1080}10811082// `scatter` copies elements from a source range into an output array according1083// to a map. For each iterator `i` in the range `[first, last)`, the value `*i`1084// is assigned to `outputFirst[mapFirst[i - first]]`. If the same index appears1085// more than once in the range `[mapFirst, mapFirst + (last - first))`, the1086// result is undefined.1087//1088// The map range, input range and the output range must not overlap.1089template <typename InputIterator1, typename InputIterator2,1090typename OutputIterator>1091void scatter(ExecutionPolicy policy, InputIterator1 first, InputIterator1 last,1092InputIterator2 mapFirst, OutputIterator outputFirst) {1093for_each(policy, countAt(0),1094countAt(static_cast<size_t>(std::distance(first, last))),1095[first, mapFirst, outputFirst](size_t i) {1096outputFirst[mapFirst[i]] = first[i];1097});1098}10991100// `scatter` copies elements from a source range into an output array according1101// to a map. For each iterator `i` in the range `[first, last)`, the value `*i`1102// is assigned to `outputFirst[mapFirst[i - first]]`. If the same index appears1103// more than once in the range `[mapFirst, mapFirst + (last - first))`,1104// the result is undefined.1105//1106// The map range, input range and the output range must not overlap.1107template <typename InputIterator1, typename InputIterator2,1108typename OutputIterator>1109void scatter(InputIterator1 first, InputIterator1 last, InputIterator2 mapFirst,1110OutputIterator outputFirst) {1111scatter(autoPolicy(first, last, 1e5), first, last, mapFirst, outputFirst);1112}11131114// `gather` copies elements from a source array into a destination range1115// according to a map. For each input iterator `i`1116// in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]`1117// is assigned to `outputFirst[i - map_first]`.1118//1119// The map range, input range and the output range must not overlap.1120template <typename InputIterator, typename RandomAccessIterator,1121typename OutputIterator>1122void gather(ExecutionPolicy policy, InputIterator mapFirst,1123InputIterator mapLast, RandomAccessIterator inputFirst,1124OutputIterator outputFirst) {1125for_each(policy, countAt(0),1126countAt(static_cast<size_t>(std::distance(mapFirst, mapLast))),1127[mapFirst, inputFirst, outputFirst](size_t i) {1128outputFirst[i] = inputFirst[mapFirst[i]];1129});1130}11311132// `gather` copies elements from a source array into a destination range1133// according to a map. For each input iterator `i`1134// in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]`1135// is assigned to `outputFirst[i - map_first]`.1136//1137// The map range, input range and the output range must not overlap.1138template <typename InputIterator, typename RandomAccessIterator,1139typename OutputIterator>1140void gather(InputIterator mapFirst, InputIterator mapLast,1141RandomAccessIterator inputFirst, OutputIterator outputFirst) {1142gather(autoPolicy(std::distance(mapFirst, mapLast), 1e5), mapFirst, mapLast,1143inputFirst, outputFirst);1144}11451146// Write `[0, last - first)` to the range `[first, last)`.1147template <typename Iterator>1148void sequence(ExecutionPolicy policy, Iterator first, Iterator last) {1149for_each(policy, countAt(0),1150countAt(static_cast<size_t>(std::distance(first, last))),1151[first](size_t i) { first[i] = i; });1152}11531154// Write `[0, last - first)` to the range `[first, last)`.1155template <typename Iterator>1156void sequence(Iterator first, Iterator last) {1157sequence(autoPolicy(first, last, 1e5), first, last);1158}11591160} // namespace manifold116111621163