CoCalc -- worker_thread

GitHub Repository: godotengine/godot
Path: blob/master/core/object/worker_thread_pool.cpp
¹⁰²⁷⁷ views
1
/**************************************************************************/
2
/*  worker_thread_pool.cpp                                                */
3
/**************************************************************************/
4
/*                         This file is part of:                          */
5
/*                             GODOT ENGINE                               */
6
/*                        https://godotengine.org                         */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
10
/*                                                                        */
11
/* Permission is hereby granted, free of charge, to any person obtaining  */
12
/* a copy of this software and associated documentation files (the        */
13
/* "Software"), to deal in the Software without restriction, including    */
14
/* without limitation the rights to use, copy, modify, merge, publish,    */
15
/* distribute, sublicense, and/or sell copies of the Software, and to     */
16
/* permit persons to whom the Software is furnished to do so, subject to  */
17
/* the following conditions:                                              */
18
/*                                                                        */
19
/* The above copyright notice and this permission notice shall be         */
20
/* included in all copies or substantial portions of the Software.        */
21
/*                                                                        */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
29
/**************************************************************************/
30

31
#include "worker_thread_pool.h"
32

33
#include "core/object/script_language.h"
34
#include "core/os/os.h"
35
#include "core/os/safe_binary_mutex.h"
36
#include "core/os/thread_safe.h"
37

38
WorkerThreadPool::Task *const WorkerThreadPool::ThreadData::YIELDING = (Task *)1;
39

40
HashMap<StringName, WorkerThreadPool *> WorkerThreadPool::named_pools;
41

42
void WorkerThreadPool::Task::free_template_userdata() {
43
	ERR_FAIL_NULL(template_userdata);
44
	ERR_FAIL_NULL(native_func_userdata);
45
	BaseTemplateUserdata *btu = (BaseTemplateUserdata *)native_func_userdata;
46
	memdelete(btu);
47
}
48

49
WorkerThreadPool *WorkerThreadPool::singleton = nullptr;
50

51
#ifdef THREADS_ENABLED
52
thread_local WorkerThreadPool::UnlockableLocks WorkerThreadPool::unlockable_locks[MAX_UNLOCKABLE_LOCKS];
53
#endif
54

55
void WorkerThreadPool::_process_task(Task *p_task) {
56
#ifdef THREADS_ENABLED
57
	int pool_thread_index = thread_ids[Thread::get_caller_id()];
58
	ThreadData &curr_thread = threads[pool_thread_index];
59
	Task *prev_task = nullptr; // In case this is recursively called.
60

61
	bool safe_for_nodes_backup = is_current_thread_safe_for_nodes();
62
	CallQueue *call_queue_backup = MessageQueue::get_singleton() != MessageQueue::get_main_singleton() ? MessageQueue::get_singleton() : nullptr;
63

64
	{
65
		// Tasks must start with these at default values. They are free to set-and-forget otherwise.
66
		set_current_thread_safe_for_nodes(false);
67
		MessageQueue::set_thread_singleton_override(nullptr);
68

69
		// Since the WorkerThreadPool is started before the script server,
70
		// its pre-created threads can't have ScriptServer::thread_enter() called on them early.
71
		// Therefore, we do it late at the first opportunity, so in case the task
72
		// about to be run uses scripting, guarantees are held.
73
		ScriptServer::thread_enter();
74

75
		task_mutex.lock();
76
		p_task->pool_thread_index = pool_thread_index;
77
		prev_task = curr_thread.current_task;
78
		curr_thread.current_task = p_task;
79
		curr_thread.has_pump_task = p_task->is_pump_task;
80
		if (p_task->pending_notify_yield_over) {
81
			curr_thread.yield_is_over = true;
82
		}
83
		task_mutex.unlock();
84
	}
85
#endif
86

87
#ifdef THREADS_ENABLED
88
	bool low_priority = p_task->low_priority;
89
#endif
90

91
	if (p_task->group) {
92
		// Handling a group
93
		bool do_post = false;
94

95
		while (true) {
96
			uint32_t work_index = p_task->group->index.postincrement();
97

98
			if (work_index >= p_task->group->max) {
99
				break;
100
			}
101
			if (p_task->native_group_func) {
102
				p_task->native_group_func(p_task->native_func_userdata, work_index);
103
			} else if (p_task->template_userdata) {
104
				p_task->template_userdata->callback_indexed(work_index);
105
			} else {
106
				p_task->callable.call(work_index);
107
			}
108

109
			// This is the only way to ensure posting is done when all tasks are really complete.
110
			uint32_t completed_amount = p_task->group->completed_index.increment();
111

112
			if (completed_amount == p_task->group->max) {
113
				do_post = true;
114
			}
115
		}
116

117
		if (do_post && p_task->template_userdata) {
118
			memdelete(p_task->template_userdata); // This is no longer needed at this point, so get rid of it.
119
		}
120

121
		if (do_post) {
122
			p_task->group->done_semaphore.post();
123
			p_task->group->completed.set_to(true);
124
		}
125
		uint32_t max_users = p_task->group->tasks_used + 1; // Add 1 because the thread waiting for it is also user. Read before to avoid another thread freeing task after increment.
126
		uint32_t finished_users = p_task->group->finished.increment();
127

128
		if (finished_users == max_users) {
129
			// Get rid of the group, because nobody else is using it.
130
			MutexLock task_lock(task_mutex);
131
			group_allocator.free(p_task->group);
132
		}
133

134
		// For groups, tasks get rid of themselves.
135

136
		task_mutex.lock();
137
		task_allocator.free(p_task);
138
	} else {
139
		if (p_task->native_func) {
140
			p_task->native_func(p_task->native_func_userdata);
141
		} else if (p_task->template_userdata) {
142
			p_task->template_userdata->callback();
143
			memdelete(p_task->template_userdata);
144
		} else {
145
			p_task->callable.call();
146
		}
147

148
		task_mutex.lock();
149
		p_task->completed = true;
150
		p_task->pool_thread_index = -1;
151
		if (p_task->waiting_user) {
152
			p_task->done_semaphore.post(p_task->waiting_user);
153
		}
154
		// Let awaiters know.
155
		for (uint32_t i = 0; i < threads.size(); i++) {
156
			if (threads[i].awaited_task == p_task) {
157
				threads[i].cond_var.notify_one();
158
				threads[i].signaled = true;
159
			}
160
		}
161
	}
162

163
#ifdef THREADS_ENABLED
164
	{
165
		curr_thread.current_task = prev_task;
166
		if (low_priority) {
167
			low_priority_threads_used--;
168

169
			if (_try_promote_low_priority_task()) {
170
				if (prev_task) { // Otherwise, this thread will catch it.
171
					_notify_threads(&curr_thread, 1, 0);
172
				}
173
			}
174
		}
175

176
		task_mutex.unlock();
177
	}
178

179
	set_current_thread_safe_for_nodes(safe_for_nodes_backup);
180
	MessageQueue::set_thread_singleton_override(call_queue_backup);
181
#endif
182
}
183

184
void WorkerThreadPool::_thread_function(void *p_user) {
185
	ThreadData *thread_data = (ThreadData *)p_user;
186
	Thread::set_name(vformat("WorkerThread %d", thread_data->index));
187

188
	while (true) {
189
		Task *task_to_process = nullptr;
190
		{
191
			// Create the lock outside the inner loop so it isn't needlessly unlocked and relocked
192
			//  when no task was found to process, and the loop is re-entered.
193
			MutexLock lock(thread_data->pool->task_mutex);
194

195
			while (true) {
196
				bool exit = thread_data->pool->_handle_runlevel(thread_data, lock);
197
				if (unlikely(exit)) {
198
					return;
199
				}
200

201
				thread_data->signaled = false;
202

203
				if (!thread_data->pool->task_queue.first()) {
204
					// There wasn't a task available yet.
205
					// Let's wait for the next notification, then recheck.
206
					thread_data->cond_var.wait(lock);
207
					continue;
208
				}
209

210
				// Got a task to process! Remove it from the queue, then break into the task handling section.
211
				task_to_process = thread_data->pool->task_queue.first()->self();
212
				thread_data->pool->task_queue.remove(thread_data->pool->task_queue.first());
213
				break;
214
			}
215
		}
216

217
		DEV_ASSERT(task_to_process);
218
		thread_data->pool->_process_task(task_to_process);
219
	}
220
}
221

222
void WorkerThreadPool::_post_tasks(Task **p_tasks, uint32_t p_count, bool p_high_priority, MutexLock<BinaryMutex> &p_lock, bool p_pump_task) {
223
	// Fall back to processing on the calling thread if there are no worker threads.
224
	// Separated into its own variable to make it easier to extend this logic
225
	// in custom builds.
226

227
	// Avoid calling pump tasks or low priority tasks from the calling thread.
228
	bool process_on_calling_thread = threads.is_empty() && !p_pump_task;
229
	if (process_on_calling_thread) {
230
		p_lock.temp_unlock();
231
		for (uint32_t i = 0; i < p_count; i++) {
232
			_process_task(p_tasks[i]);
233
		}
234
		p_lock.temp_relock();
235
		return;
236
	}
237

238
	while (runlevel == RUNLEVEL_EXIT_LANGUAGES) {
239
		control_cond_var.wait(p_lock);
240
	}
241

242
	uint32_t to_process = 0;
243
	uint32_t to_promote = 0;
244

245
	ThreadData *caller_pool_thread = thread_ids.has(Thread::get_caller_id()) ? &threads[thread_ids[Thread::get_caller_id()]] : nullptr;
246

247
	for (uint32_t i = 0; i < p_count; i++) {
248
		p_tasks[i]->low_priority = !p_high_priority;
249
		if (p_high_priority || low_priority_threads_used < max_low_priority_threads) {
250
			task_queue.add_last(&p_tasks[i]->task_elem);
251
			if (!p_high_priority) {
252
				low_priority_threads_used++;
253
			}
254
			to_process++;
255
		} else {
256
			// Too many threads using low priority, must go to queue.
257
			low_priority_task_queue.add_last(&p_tasks[i]->task_elem);
258
			to_promote++;
259
		}
260
	}
261

262
	_notify_threads(caller_pool_thread, to_process, to_promote);
263
}
264

265
void WorkerThreadPool::_notify_threads(const ThreadData *p_current_thread_data, uint32_t p_process_count, uint32_t p_promote_count) {
266
	uint32_t to_process = p_process_count;
267
	uint32_t to_promote = p_promote_count;
268

269
	// This is where which threads are awaken is decided according to the workload.
270
	// Threads that will anyway have a chance to check the situation and process/promote tasks
271
	// are excluded from being notified. Others will be tried anyway to try to distribute load.
272
	// The current thread, if is a pool thread, is also excluded depending on the promoting/processing
273
	// needs because it will anyway loop again. However, it will contribute to decreasing the count,
274
	// which helps reducing sync traffic.
275

276
	uint32_t thread_count = threads.size();
277

278
	// First round:
279
	// 1. For processing: notify threads that are not running tasks, to keep the stacks as shallow as possible.
280
	// 2. For promoting: since it's exclusive with processing, we fin threads able to promote low-prio tasks now.
281
	for (uint32_t i = 0;
282
			i < thread_count && (to_process || to_promote);
283
			i++, notify_index = (notify_index + 1) % thread_count) {
284
		ThreadData &th = threads[notify_index];
285

286
		if (th.signaled) {
287
			continue;
288
		}
289
		if (th.current_task) {
290
			// Good thread for promoting low-prio?
291
			if (to_promote && th.awaited_task && th.current_task->low_priority) {
292
				if (likely(&th != p_current_thread_data)) {
293
					th.cond_var.notify_one();
294
				}
295
				th.signaled = true;
296
				to_promote--;
297
			}
298
		} else {
299
			if (to_process) {
300
				if (likely(&th != p_current_thread_data)) {
301
					th.cond_var.notify_one();
302
				}
303
				th.signaled = true;
304
				to_process--;
305
			}
306
		}
307
	}
308

309
	// Second round:
310
	// For processing: if the first round wasn't enough, let's try now with threads processing tasks but currently awaiting.
311
	for (uint32_t i = 0;
312
			i < thread_count && to_process;
313
			i++, notify_index = (notify_index + 1) % thread_count) {
314
		ThreadData &th = threads[notify_index];
315

316
		if (th.signaled) {
317
			continue;
318
		}
319
		if (th.awaited_task) {
320
			if (likely(&th != p_current_thread_data)) {
321
				th.cond_var.notify_one();
322
			}
323
			th.signaled = true;
324
			to_process--;
325
		}
326
	}
327
}
328

329
bool WorkerThreadPool::_try_promote_low_priority_task() {
330
	if (low_priority_task_queue.first()) {
331
		Task *low_prio_task = low_priority_task_queue.first()->self();
332
		low_priority_task_queue.remove(low_priority_task_queue.first());
333
		task_queue.add_last(&low_prio_task->task_elem);
334
		low_priority_threads_used++;
335
		return true;
336
	} else {
337
		return false;
338
	}
339
}
340

341
WorkerThreadPool::TaskID WorkerThreadPool::add_native_task(void (*p_func)(void *), void *p_userdata, bool p_high_priority, const String &p_description) {
342
	return _add_task(Callable(), p_func, p_userdata, nullptr, p_high_priority, p_description);
343
}
344

345
WorkerThreadPool::TaskID WorkerThreadPool::_add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description, bool p_pump_task) {
346
	MutexLock<BinaryMutex> lock(task_mutex);
347

348
	// Get a free task
349
	Task *task = task_allocator.alloc();
350
	TaskID id = last_task++;
351
	task->self = id;
352
	task->callable = p_callable;
353
	task->native_func = p_func;
354
	task->native_func_userdata = p_userdata;
355
	task->description = p_description;
356
	task->template_userdata = p_template_userdata;
357
	task->is_pump_task = p_pump_task;
358
	tasks.insert(id, task);
359

360
#ifdef THREADS_ENABLED
361
	if (p_pump_task) {
362
		pump_task_count++;
363
		int thread_count = get_thread_count();
364
		if (pump_task_count >= thread_count) {
365
			print_verbose(vformat("A greater number of dedicated threads were requested (%d) than threads available (%d). Please increase the number of available worker task threads. Recovering this session by spawning more worker task threads.", pump_task_count + 1, thread_count)); // +1 because we want to keep a Thread without any pump tasks free.
366

367
			Thread::Settings settings;
368
#ifdef __APPLE__
369
			// The default stack size for new threads on Apple platforms is 512KiB.
370
			// This is insufficient when using a library like SPIRV-Cross,
371
			// which can generate deep stacks and result in a stack overflow.
372
#ifdef DEV_ENABLED
373
			// Debug builds need an even larger stack size.
374
			settings.stack_size = 2 * 1024 * 1024; // 2 MiB
375
#else
376
			settings.stack_size = 1 * 1024 * 1024; // 1 MiB
377
#endif
378
#endif
379
			// Re-sizing implies relocation, which is not supported for this array.
380
			CRASH_COND_MSG(thread_count + 1 > (int)threads.get_capacity(), "Reserve trick for worker thread pool failed. Crashing.");
381
			threads.resize_initialized(thread_count + 1);
382
			threads[thread_count].index = thread_count;
383
			threads[thread_count].pool = this;
384
			threads[thread_count].thread.start(&WorkerThreadPool::_thread_function, &threads[thread_count], settings);
385
			thread_ids.insert(threads[thread_count].thread.get_id(), thread_count);
386
		}
387
	}
388
#endif
389

390
	_post_tasks(&task, 1, p_high_priority, lock, p_pump_task);
391

392
	return id;
393
}
394

395
WorkerThreadPool::TaskID WorkerThreadPool::add_task(const Callable &p_action, bool p_high_priority, const String &p_description, bool p_pump_task) {
396
	return _add_task(p_action, nullptr, nullptr, nullptr, p_high_priority, p_description, p_pump_task);
397
}
398

399
WorkerThreadPool::TaskID WorkerThreadPool::add_task_bind(const Callable &p_action, bool p_high_priority, const String &p_description) {
400
	return _add_task(p_action, nullptr, nullptr, nullptr, p_high_priority, p_description, false);
401
}
402

403
bool WorkerThreadPool::is_task_completed(TaskID p_task_id) const {
404
	MutexLock task_lock(task_mutex);
405
	const Task *const *taskp = tasks.getptr(p_task_id);
406
	if (!taskp) {
407
		ERR_FAIL_V_MSG(false, "Invalid Task ID"); // Invalid task
408
	}
409

410
	return (*taskp)->completed;
411
}
412

413
Error WorkerThreadPool::wait_for_task_completion(TaskID p_task_id) {
414
	task_mutex.lock();
415
	Task **taskp = tasks.getptr(p_task_id);
416
	if (!taskp) {
417
		task_mutex.unlock();
418
		ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Invalid Task ID"); // Invalid task
419
	}
420
	Task *task = *taskp;
421

422
	if (task->completed) {
423
		if (task->waiting_pool == 0 && task->waiting_user == 0) {
424
			tasks.erase(p_task_id);
425
			task_allocator.free(task);
426
		}
427
		task_mutex.unlock();
428
		return OK;
429
	}
430

431
	ThreadData *caller_pool_thread = thread_ids.has(Thread::get_caller_id()) ? &threads[thread_ids[Thread::get_caller_id()]] : nullptr;
432
	if (caller_pool_thread && p_task_id <= caller_pool_thread->current_task->self) {
433
		// Deadlock prevention:
434
		// When a pool thread wants to wait for an older task, the following situations can happen:
435
		// 1. Awaited task is deep in the stack of the awaiter.
436
		// 2. A group of awaiter threads end up depending on some tasks buried in the stack
437
		//    of their worker threads in such a way that progress can't be made.
438
		// Both would entail a deadlock. Some may be handled here in the WorkerThreadPool
439
		// with some extra logic and bookkeeping. However, there would still be unavoidable
440
		// cases of deadlock because of the way waiting threads process outstanding tasks.
441
		// Taking into account there's no feasible solution for every possible case
442
		// with the current design, we just simply reject attempts to await on older tasks,
443
		// with a specific error code that signals the situation so the caller can handle it.
444
		task_mutex.unlock();
445
		return ERR_BUSY;
446
	}
447

448
	if (caller_pool_thread) {
449
		task->waiting_pool++;
450
	} else {
451
		task->waiting_user++;
452
	}
453

454
	if (caller_pool_thread) {
455
		task_mutex.unlock();
456
		_wait_collaboratively(caller_pool_thread, task);
457
		task_mutex.lock();
458
		task->waiting_pool--;
459
		if (task->waiting_pool == 0 && task->waiting_user == 0) {
460
			tasks.erase(p_task_id);
461
			task_allocator.free(task);
462
		}
463
	} else {
464
		task_mutex.unlock();
465
		task->done_semaphore.wait();
466
		task_mutex.lock();
467
		task->waiting_user--;
468
		if (task->waiting_pool == 0 && task->waiting_user == 0) {
469
			tasks.erase(p_task_id);
470
			task_allocator.free(task);
471
		}
472
	}
473

474
	task_mutex.unlock();
475
	return OK;
476
}
477

478
void WorkerThreadPool::_lock_unlockable_mutexes() {
479
#ifdef THREADS_ENABLED
480
	for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) {
481
		if (unlockable_locks[i].ulock) {
482
			unlockable_locks[i].ulock->lock();
483
		}
484
	}
485
#endif
486
}
487

488
void WorkerThreadPool::_unlock_unlockable_mutexes() {
489
#ifdef THREADS_ENABLED
490
	for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) {
491
		if (unlockable_locks[i].ulock) {
492
			unlockable_locks[i].ulock->unlock();
493
		}
494
	}
495
#endif
496
}
497

498
void WorkerThreadPool::_wait_collaboratively(ThreadData *p_caller_pool_thread, Task *p_task) {
499
	// Keep processing tasks until the condition to stop waiting is met.
500

501
	while (true) {
502
		Task *task_to_process = nullptr;
503
		bool relock_unlockables = false;
504
		{
505
			MutexLock lock(task_mutex);
506

507
			bool was_signaled = p_caller_pool_thread->signaled;
508
			p_caller_pool_thread->signaled = false;
509

510
			bool exit = _handle_runlevel(p_caller_pool_thread, lock);
511
			if (unlikely(exit)) {
512
				break;
513
			}
514

515
			bool wait_is_over = false;
516
			if (unlikely(p_task == ThreadData::YIELDING)) {
517
				if (p_caller_pool_thread->yield_is_over) {
518
					p_caller_pool_thread->yield_is_over = false;
519
					wait_is_over = true;
520
				}
521
			} else {
522
				if (p_task->completed) {
523
					wait_is_over = true;
524
				}
525
			}
526

527
			if (wait_is_over) {
528
				if (was_signaled) {
529
					// This thread was awaken for some additional reason, but it's about to exit.
530
					// Let's find out what may be pending and forward the requests.
531
					uint32_t to_process = task_queue.first() ? 1 : 0;
532
					uint32_t to_promote = p_caller_pool_thread->current_task->low_priority && low_priority_task_queue.first() ? 1 : 0;
533
					if (to_process || to_promote) {
534
						// This thread must be left alone since it won't loop again.
535
						p_caller_pool_thread->signaled = true;
536
						_notify_threads(p_caller_pool_thread, to_process, to_promote);
537
					}
538
				}
539

540
				break;
541
			}
542

543
			if (p_caller_pool_thread->current_task->low_priority && low_priority_task_queue.first()) {
544
				if (_try_promote_low_priority_task()) {
545
					_notify_threads(p_caller_pool_thread, 1, 0);
546
				}
547
			}
548

549
			if (p_caller_pool_thread->pool->task_queue.first()) {
550
				task_to_process = task_queue.first()->self();
551
				if ((p_task == ThreadData::YIELDING || p_caller_pool_thread->has_pump_task == true) && task_to_process->is_pump_task) {
552
					task_to_process = nullptr;
553
					_notify_threads(p_caller_pool_thread, 1, 0);
554
				} else {
555
					task_queue.remove(task_queue.first());
556
				}
557
			}
558

559
			if (!task_to_process) {
560
				p_caller_pool_thread->awaited_task = p_task;
561

562
				if (this == singleton) {
563
					_unlock_unlockable_mutexes();
564
				}
565
				relock_unlockables = true;
566

567
				p_caller_pool_thread->cond_var.wait(lock);
568

569
				p_caller_pool_thread->awaited_task = nullptr;
570
			}
571
		}
572

573
		if (relock_unlockables && this == singleton) {
574
			_lock_unlockable_mutexes();
575
		}
576

577
		if (task_to_process) {
578
			_process_task(task_to_process);
579
		}
580
	}
581
}
582

583
void WorkerThreadPool::_switch_runlevel(Runlevel p_runlevel) {
584
	DEV_ASSERT(p_runlevel > runlevel);
585
	runlevel = p_runlevel;
586
	memset(&runlevel_data, 0, sizeof(runlevel_data));
587
	for (uint32_t i = 0; i < threads.size(); i++) {
588
		threads[i].cond_var.notify_one();
589
		threads[i].signaled = true;
590
	}
591
	control_cond_var.notify_all();
592
}
593

594
// Returns whether threads have to exit. This may perform the check about handling needed.
595
bool WorkerThreadPool::_handle_runlevel(ThreadData *p_thread_data, MutexLock<BinaryMutex> &p_lock) {
596
	bool exit = false;
597
	switch (runlevel) {
598
		case RUNLEVEL_NORMAL: {
599
		} break;
600
		case RUNLEVEL_PRE_EXIT_LANGUAGES: {
601
			if (!p_thread_data->pre_exited_languages) {
602
				if (!task_queue.first() && !low_priority_task_queue.first()) {
603
					p_thread_data->pre_exited_languages = true;
604
					runlevel_data.pre_exit_languages.num_idle_threads++;
605
					control_cond_var.notify_all();
606
				}
607
			}
608
		} break;
609
		case RUNLEVEL_EXIT_LANGUAGES: {
610
			if (!p_thread_data->exited_languages) {
611
				p_lock.temp_unlock();
612
				ScriptServer::thread_exit();
613
				p_lock.temp_relock();
614
				p_thread_data->exited_languages = true;
615
				runlevel_data.exit_languages.num_exited_threads++;
616
				control_cond_var.notify_all();
617
			}
618
		} break;
619
		case RUNLEVEL_EXIT: {
620
			exit = true;
621
		} break;
622
	}
623
	return exit;
624
}
625

626
void WorkerThreadPool::yield() {
627
	int th_index = get_thread_index();
628
	ERR_FAIL_COND_MSG(th_index == -1, "This function can only be called from a worker thread.");
629
	_wait_collaboratively(&threads[th_index], ThreadData::YIELDING);
630

631
	task_mutex.lock();
632
	if (runlevel < RUNLEVEL_EXIT_LANGUAGES) {
633
		// If this long-lived task started before the scripting server was initialized,
634
		// now is a good time to have scripting languages ready for the current thread.
635
		// Otherwise, such a piece of setup won't happen unless another task has been
636
		// run during the collaborative wait.
637
		task_mutex.unlock();
638
		ScriptServer::thread_enter();
639
	} else {
640
		task_mutex.unlock();
641
	}
642
}
643

644
void WorkerThreadPool::notify_yield_over(TaskID p_task_id) {
645
	MutexLock task_lock(task_mutex);
646
	Task **taskp = tasks.getptr(p_task_id);
647
	if (!taskp) {
648
		ERR_FAIL_MSG("Invalid Task ID.");
649
	}
650
	Task *task = *taskp;
651
	if (task->pool_thread_index == -1) { // Completed or not started yet.
652
		if (!task->completed) {
653
			// This avoids a race condition where a task is created and yield-over called before it's processed.
654
			task->pending_notify_yield_over = true;
655
		}
656
		return;
657
	}
658

659
	ThreadData &td = threads[task->pool_thread_index];
660
	td.yield_is_over = true;
661
	td.signaled = true;
662
	td.cond_var.notify_one();
663
}
664

665
WorkerThreadPool::GroupID WorkerThreadPool::_add_group_task(const Callable &p_callable, void (*p_func)(void *, uint32_t), void *p_userdata, BaseTemplateUserdata *p_template_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
666
	ERR_FAIL_COND_V(p_elements < 0, INVALID_TASK_ID);
667
	if (p_tasks < 0) {
668
		p_tasks = MAX(1u, threads.size());
669
	}
670

671
	MutexLock<BinaryMutex> lock(task_mutex);
672

673
	Group *group = group_allocator.alloc();
674
	GroupID id = last_task++;
675
	group->max = p_elements;
676
	group->self = id;
677

678
	Task **tasks_posted = nullptr;
679
	if (p_elements == 0) {
680
		// Should really not call it with zero Elements, but at least it should work.
681
		group->completed.set_to(true);
682
		group->done_semaphore.post();
683
		group->tasks_used = 0;
684
		p_tasks = 0;
685
		if (p_template_userdata) {
686
			memdelete(p_template_userdata);
687
		}
688

689
	} else {
690
		group->tasks_used = p_tasks;
691
		tasks_posted = (Task **)alloca(sizeof(Task *) * p_tasks);
692
		for (int i = 0; i < p_tasks; i++) {
693
			Task *task = task_allocator.alloc();
694
			task->native_group_func = p_func;
695
			task->native_func_userdata = p_userdata;
696
			task->description = p_description;
697
			task->group = group;
698
			task->callable = p_callable;
699
			task->template_userdata = p_template_userdata;
700
			tasks_posted[i] = task;
701
			// No task ID is used.
702
		}
703
	}
704

705
	groups[id] = group;
706

707
	_post_tasks(tasks_posted, p_tasks, p_high_priority, lock, false);
708

709
	return id;
710
}
711

712
WorkerThreadPool::GroupID WorkerThreadPool::add_native_group_task(void (*p_func)(void *, uint32_t), void *p_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
713
	return _add_group_task(Callable(), p_func, p_userdata, nullptr, p_elements, p_tasks, p_high_priority, p_description);
714
}
715

716
WorkerThreadPool::GroupID WorkerThreadPool::add_group_task(const Callable &p_action, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
717
	return _add_group_task(p_action, nullptr, nullptr, nullptr, p_elements, p_tasks, p_high_priority, p_description);
718
}
719

720
uint32_t WorkerThreadPool::get_group_processed_element_count(GroupID p_group) const {
721
	MutexLock task_lock(task_mutex);
722
	const Group *const *groupp = groups.getptr(p_group);
723
	if (!groupp) {
724
		ERR_FAIL_V_MSG(0, "Invalid Group ID");
725
	}
726
	return (*groupp)->completed_index.get();
727
}
728
bool WorkerThreadPool::is_group_task_completed(GroupID p_group) const {
729
	MutexLock task_lock(task_mutex);
730
	const Group *const *groupp = groups.getptr(p_group);
731
	if (!groupp) {
732
		ERR_FAIL_V_MSG(false, "Invalid Group ID");
733
	}
734
	return (*groupp)->completed.is_set();
735
}
736

737
void WorkerThreadPool::wait_for_group_task_completion(GroupID p_group) {
738
#ifdef THREADS_ENABLED
739
	task_mutex.lock();
740
	Group **groupp = groups.getptr(p_group);
741
	task_mutex.unlock();
742
	if (!groupp) {
743
		ERR_FAIL_MSG("Invalid Group ID.");
744
	}
745

746
	{
747
		Group *group = *groupp;
748

749
		if (this == singleton) {
750
			_unlock_unlockable_mutexes();
751
		}
752
		group->done_semaphore.wait();
753
		if (this == singleton) {
754
			_lock_unlockable_mutexes();
755
		}
756

757
		uint32_t max_users = group->tasks_used + 1; // Add 1 because the thread waiting for it is also user. Read before to avoid another thread freeing task after increment.
758
		uint32_t finished_users = group->finished.increment(); // fetch happens before inc, so increment later.
759

760
		if (finished_users == max_users) {
761
			// All tasks using this group are gone (finished before the group), so clear the group too.
762
			MutexLock task_lock(task_mutex);
763
			group_allocator.free(group);
764
		}
765
	}
766

767
	MutexLock task_lock(task_mutex); // This mutex is needed when Physics 2D and/or 3D is selected to run on a separate thread.
768
	groups.erase(p_group);
769
#endif
770
}
771

772
int WorkerThreadPool::get_thread_index() const {
773
	Thread::ID tid = Thread::get_caller_id();
774
	return thread_ids.has(tid) ? thread_ids[tid] : -1;
775
}
776

777
WorkerThreadPool::TaskID WorkerThreadPool::get_caller_task_id() const {
778
	int th_index = get_thread_index();
779
	if (th_index != -1 && threads[th_index].current_task) {
780
		return threads[th_index].current_task->self;
781
	} else {
782
		return INVALID_TASK_ID;
783
	}
784
}
785

786
WorkerThreadPool::GroupID WorkerThreadPool::get_caller_group_id() const {
787
	int th_index = get_thread_index();
788
	if (th_index != -1 && threads[th_index].current_task && threads[th_index].current_task->group) {
789
		return threads[th_index].current_task->group->self;
790
	} else {
791
		return INVALID_TASK_ID;
792
	}
793
}
794

795
#ifdef THREADS_ENABLED
796
uint32_t WorkerThreadPool::_thread_enter_unlock_allowance_zone(THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &p_ulock) {
797
	for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) {
798
		DEV_ASSERT((bool)unlockable_locks[i].ulock == (bool)unlockable_locks[i].rc);
799
		if (unlockable_locks[i].ulock == &p_ulock) {
800
			// Already registered in the current thread.
801
			unlockable_locks[i].rc++;
802
			return i;
803
		} else if (!unlockable_locks[i].ulock) {
804
			unlockable_locks[i].ulock = &p_ulock;
805
			unlockable_locks[i].rc = 1;
806
			return i;
807
		}
808
	}
809
	ERR_FAIL_V_MSG(UINT32_MAX, "No more unlockable lock slots available. Engine bug.");
810
}
811

812
void WorkerThreadPool::thread_exit_unlock_allowance_zone(uint32_t p_zone_id) {
813
	DEV_ASSERT(unlockable_locks[p_zone_id].ulock && unlockable_locks[p_zone_id].rc);
814
	unlockable_locks[p_zone_id].rc--;
815
	if (unlockable_locks[p_zone_id].rc == 0) {
816
		unlockable_locks[p_zone_id].ulock = nullptr;
817
	}
818
}
819
#endif
820

821
void WorkerThreadPool::init(int p_thread_count, float p_low_priority_task_ratio) {
822
	ERR_FAIL_COND(threads.size() > 0);
823

824
	runlevel = RUNLEVEL_NORMAL;
825

826
	if (p_thread_count < 0) {
827
		p_thread_count = OS::get_singleton()->get_default_thread_pool_size();
828
	}
829

830
	max_low_priority_threads = CLAMP(p_thread_count * p_low_priority_task_ratio, 1, p_thread_count - 1);
831

832
	print_verbose(vformat("WorkerThreadPool: %d threads, %d max low-priority.", p_thread_count, max_low_priority_threads));
833

834
#ifdef THREADS_ENABLED
835
	// Reserve 5 threads in case we need separate threads for 1) 2D physics 2) 3D physics 3) rendering 4) GPU texture compression, 5) all other tasks.
836
	// We cannot safely increase the Vector size at runtime, so reserve enough up front, but only launch those needed.
837
	threads.reserve(5);
838
#endif
839
	threads.resize(p_thread_count);
840

841
	Thread::Settings settings;
842
#ifdef __APPLE__
843
	// The default stack size for new threads on Apple platforms is 512KiB.
844
	// This is insufficient when using a library like SPIRV-Cross,
845
	// which can generate deep stacks and result in a stack overflow.
846
#ifdef DEV_ENABLED
847
	// Debug builds need an even larger stack size.
848
	settings.stack_size = 2 * 1024 * 1024; // 2 MiB
849
#else
850
	settings.stack_size = 1 * 1024 * 1024; // 1 MiB
851
#endif
852
#endif
853

854
	for (uint32_t i = 0; i < threads.size(); i++) {
855
		threads[i].index = i;
856
		threads[i].pool = this;
857
		threads[i].thread.start(&WorkerThreadPool::_thread_function, &threads[i], settings);
858
		thread_ids.insert(threads[i].thread.get_id(), i);
859
	}
860
}
861

862
void WorkerThreadPool::exit_languages_threads() {
863
	if (threads.is_empty()) {
864
		return;
865
	}
866

867
	MutexLock lock(task_mutex);
868

869
	// Wait until all threads are idle.
870
	_switch_runlevel(RUNLEVEL_PRE_EXIT_LANGUAGES);
871
	while (runlevel_data.pre_exit_languages.num_idle_threads != threads.size()) {
872
		control_cond_var.wait(lock);
873
	}
874

875
	// Wait until all threads have detached from scripting languages.
876
	_switch_runlevel(RUNLEVEL_EXIT_LANGUAGES);
877
	while (runlevel_data.exit_languages.num_exited_threads != threads.size()) {
878
		control_cond_var.wait(lock);
879
	}
880
}
881

882
void WorkerThreadPool::finish() {
883
	if (threads.is_empty()) {
884
		return;
885
	}
886

887
	{
888
		MutexLock lock(task_mutex);
889
		SelfList<Task> *E = low_priority_task_queue.first();
890
		while (E) {
891
			print_error("Task waiting was never re-claimed: " + E->self()->description);
892
			E = E->next();
893
		}
894

895
		_switch_runlevel(RUNLEVEL_EXIT);
896
	}
897

898
	for (ThreadData &data : threads) {
899
		data.thread.wait_to_finish();
900
	}
901

902
	{
903
		MutexLock lock(task_mutex);
904
		for (KeyValue<TaskID, Task *> &E : tasks) {
905
			task_allocator.free(E.value);
906
		}
907
	}
908

909
	threads.clear();
910
}
911

912
void WorkerThreadPool::_bind_methods() {
913
	ClassDB::bind_method(D_METHOD("add_task", "action", "high_priority", "description"), &WorkerThreadPool::add_task_bind, DEFVAL(false), DEFVAL(String()));
914
	ClassDB::bind_method(D_METHOD("is_task_completed", "task_id"), &WorkerThreadPool::is_task_completed);
915
	ClassDB::bind_method(D_METHOD("wait_for_task_completion", "task_id"), &WorkerThreadPool::wait_for_task_completion);
916
	ClassDB::bind_method(D_METHOD("get_caller_task_id"), &WorkerThreadPool::get_caller_task_id);
917

918
	ClassDB::bind_method(D_METHOD("add_group_task", "action", "elements", "tasks_needed", "high_priority", "description"), &WorkerThreadPool::add_group_task, DEFVAL(-1), DEFVAL(false), DEFVAL(String()));
919
	ClassDB::bind_method(D_METHOD("is_group_task_completed", "group_id"), &WorkerThreadPool::is_group_task_completed);
920
	ClassDB::bind_method(D_METHOD("get_group_processed_element_count", "group_id"), &WorkerThreadPool::get_group_processed_element_count);
921
	ClassDB::bind_method(D_METHOD("wait_for_group_task_completion", "group_id"), &WorkerThreadPool::wait_for_group_task_completion);
922
	ClassDB::bind_method(D_METHOD("get_caller_group_id"), &WorkerThreadPool::get_caller_group_id);
923
}
924

925
WorkerThreadPool *WorkerThreadPool::get_named_pool(const StringName &p_name) {
926
	WorkerThreadPool **pool_ptr = named_pools.getptr(p_name);
927
	if (pool_ptr) {
928
		return *pool_ptr;
929
	} else {
930
		WorkerThreadPool *pool = memnew(WorkerThreadPool(false));
931
		pool->init();
932
		named_pools[p_name] = pool;
933
		return pool;
934
	}
935
}
936

937
WorkerThreadPool::WorkerThreadPool(bool p_singleton) {
938
	if (p_singleton) {
939
		singleton = this;
940
	}
941
}
942

943
WorkerThreadPool::~WorkerThreadPool() {
944
	finish();
945

946
	if (this == singleton) {
947
		singleton = nullptr;
948
		for (KeyValue<StringName, WorkerThreadPool *> &E : named_pools) {
949
			E.value->finish();
950
			memdelete(E.value);
951
		}
952
		named_pools.clear();
953
	}
954
}
955

956
Product

Resources

Company