CoCalc -- amdgpu

GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
²⁹²⁸⁵ views
1
/*
2
 * Copyright 2008 Jerome Glisse.
3
 * All Rights Reserved.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Jerome Glisse <[email protected]>
26
 */
27

28
#include <linux/file.h>
29
#include <linux/pagemap.h>
30
#include <linux/sync_file.h>
31
#include <linux/dma-buf.h>
32
#include <linux/hmm.h>
33

34
#include <drm/amdgpu_drm.h>
35
#include <drm/drm_syncobj.h>
36
#include <drm/ttm/ttm_tt.h>
37

38
#include "amdgpu_cs.h"
39
#include "amdgpu.h"
40
#include "amdgpu_trace.h"
41
#include "amdgpu_gmc.h"
42
#include "amdgpu_gem.h"
43
#include "amdgpu_ras.h"
44

45
static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
46
				 struct amdgpu_device *adev,
47
				 struct drm_file *filp,
48
				 union drm_amdgpu_cs *cs)
49
{
50
	struct amdgpu_fpriv *fpriv = filp->driver_priv;
51

52
	if (cs->in.num_chunks == 0)
53
		return -EINVAL;
54

55
	memset(p, 0, sizeof(*p));
56
	p->adev = adev;
57
	p->filp = filp;
58

59
	p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
60
	if (!p->ctx)
61
		return -EINVAL;
62

63
	if (atomic_read(&p->ctx->guilty)) {
64
		amdgpu_ctx_put(p->ctx);
65
		return -ECANCELED;
66
	}
67

68
	amdgpu_sync_create(&p->sync);
69
	drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
70
		      DRM_EXEC_IGNORE_DUPLICATES, 0);
71
	return 0;
72
}
73

74
static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
75
			     struct drm_amdgpu_cs_chunk_ib *chunk_ib)
76
{
77
	struct drm_sched_entity *entity;
78
	unsigned int i;
79
	int r;
80

81
	r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
82
				  chunk_ib->ip_instance,
83
				  chunk_ib->ring, &entity);
84
	if (r)
85
		return r;
86

87
	/*
88
	 * Abort if there is no run queue associated with this entity.
89
	 * Possibly because of disabled HW IP.
90
	 */
91
	if (entity->rq == NULL)
92
		return -EINVAL;
93

94
	/* Check if we can add this IB to some existing job */
95
	for (i = 0; i < p->gang_size; ++i)
96
		if (p->entities[i] == entity)
97
			return i;
98

99
	/* If not increase the gang size if possible */
100
	if (i == AMDGPU_CS_GANG_SIZE)
101
		return -EINVAL;
102

103
	p->entities[i] = entity;
104
	p->gang_size = i + 1;
105
	return i;
106
}
107

108
static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
109
			   struct drm_amdgpu_cs_chunk_ib *chunk_ib,
110
			   unsigned int *num_ibs)
111
{
112
	int r;
113

114
	r = amdgpu_cs_job_idx(p, chunk_ib);
115
	if (r < 0)
116
		return r;
117

118
	if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
119
		return -EINVAL;
120

121
	++(num_ibs[r]);
122
	p->gang_leader_idx = r;
123
	return 0;
124
}
125

126
static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
127
				   struct drm_amdgpu_cs_chunk_fence *data,
128
				   uint32_t *offset)
129
{
130
	struct drm_gem_object *gobj;
131
	unsigned long size;
132

133
	gobj = drm_gem_object_lookup(p->filp, data->handle);
134
	if (gobj == NULL)
135
		return -EINVAL;
136

137
	p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
138
	drm_gem_object_put(gobj);
139

140
	size = amdgpu_bo_size(p->uf_bo);
141
	if (size != PAGE_SIZE || data->offset > (size - 8))
142
		return -EINVAL;
143

144
	if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))
145
		return -EINVAL;
146

147
	*offset = data->offset;
148
	return 0;
149
}
150

151
static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
152
				   struct drm_amdgpu_bo_list_in *data)
153
{
154
	struct drm_amdgpu_bo_list_entry *info;
155
	int r;
156

157
	r = amdgpu_bo_create_list_entry_array(data, &info);
158
	if (r)
159
		return r;
160

161
	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
162
				  &p->bo_list);
163
	if (r)
164
		goto error_free;
165

166
	kvfree(info);
167
	return 0;
168

169
error_free:
170
	kvfree(info);
171

172
	return r;
173
}
174

175
/* Copy the data from userspace and go over it the first time */
176
static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
177
			   union drm_amdgpu_cs *cs)
178
{
179
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
180
	unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
181
	struct amdgpu_vm *vm = &fpriv->vm;
182
	uint64_t *chunk_array;
183
	uint32_t uf_offset = 0;
184
	size_t size;
185
	int ret;
186
	int i;
187

188
	chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks),
189
					cs->in.num_chunks,
190
					sizeof(uint64_t));
191
	if (IS_ERR(chunk_array))
192
		return PTR_ERR(chunk_array);
193

194
	p->nchunks = cs->in.num_chunks;
195
	p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
196
			    GFP_KERNEL);
197
	if (!p->chunks) {
198
		ret = -ENOMEM;
199
		goto free_chunk;
200
	}
201

202
	for (i = 0; i < p->nchunks; i++) {
203
		struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
204
		struct drm_amdgpu_cs_chunk user_chunk;
205

206
		chunk_ptr = u64_to_user_ptr(chunk_array[i]);
207
		if (copy_from_user(&user_chunk, chunk_ptr,
208
				       sizeof(struct drm_amdgpu_cs_chunk))) {
209
			ret = -EFAULT;
210
			i--;
211
			goto free_partial_kdata;
212
		}
213
		p->chunks[i].chunk_id = user_chunk.chunk_id;
214
		p->chunks[i].length_dw = user_chunk.length_dw;
215

216
		size = p->chunks[i].length_dw;
217

218
		p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data),
219
							size,
220
							sizeof(uint32_t));
221
		if (IS_ERR(p->chunks[i].kdata)) {
222
			ret = PTR_ERR(p->chunks[i].kdata);
223
			i--;
224
			goto free_partial_kdata;
225
		}
226
		size *= sizeof(uint32_t);
227

228
		/* Assume the worst on the following checks */
229
		ret = -EINVAL;
230
		switch (p->chunks[i].chunk_id) {
231
		case AMDGPU_CHUNK_ID_IB:
232
			if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
233
				goto free_partial_kdata;
234

235
			ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
236
			if (ret)
237
				goto free_partial_kdata;
238
			break;
239

240
		case AMDGPU_CHUNK_ID_FENCE:
241
			if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
242
				goto free_partial_kdata;
243

244
			ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
245
						      &uf_offset);
246
			if (ret)
247
				goto free_partial_kdata;
248
			break;
249

250
		case AMDGPU_CHUNK_ID_BO_HANDLES:
251
			if (size < sizeof(struct drm_amdgpu_bo_list_in))
252
				goto free_partial_kdata;
253

254
			/* Only a single BO list is allowed to simplify handling. */
255
			if (p->bo_list)
256
				goto free_partial_kdata;
257

258
			ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
259
			if (ret)
260
				goto free_partial_kdata;
261
			break;
262

263
		case AMDGPU_CHUNK_ID_DEPENDENCIES:
264
		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
265
		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
266
		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
267
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
268
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
269
		case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
270
			break;
271

272
		default:
273
			goto free_partial_kdata;
274
		}
275
	}
276

277
	if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) {
278
		ret = -EINVAL;
279
		goto free_all_kdata;
280
	}
281

282
	for (i = 0; i < p->gang_size; ++i) {
283
		ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
284
				       num_ibs[i], &p->jobs[i],
285
				       p->filp->client_id);
286
		if (ret)
287
			goto free_all_kdata;
288
		switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
289
		case AMDGPU_ENFORCE_ISOLATION_DISABLE:
290
		default:
291
			p->jobs[i]->enforce_isolation = false;
292
			p->jobs[i]->run_cleaner_shader = false;
293
			break;
294
		case AMDGPU_ENFORCE_ISOLATION_ENABLE:
295
			p->jobs[i]->enforce_isolation = true;
296
			p->jobs[i]->run_cleaner_shader = true;
297
			break;
298
		case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:
299
			p->jobs[i]->enforce_isolation = true;
300
			p->jobs[i]->run_cleaner_shader = false;
301
			break;
302
		case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:
303
			p->jobs[i]->enforce_isolation = true;
304
			p->jobs[i]->run_cleaner_shader = false;
305
			break;
306
		}
307
	}
308
	p->gang_leader = p->jobs[p->gang_leader_idx];
309

310
	if (p->ctx->generation != p->gang_leader->generation) {
311
		ret = -ECANCELED;
312
		goto free_all_kdata;
313
	}
314

315
	if (p->uf_bo)
316
		p->gang_leader->uf_addr = uf_offset;
317
	kvfree(chunk_array);
318

319
	/* Use this opportunity to fill in task info for the vm */
320
	amdgpu_vm_set_task_info(vm);
321

322
	return 0;
323

324
free_all_kdata:
325
	i = p->nchunks - 1;
326
free_partial_kdata:
327
	for (; i >= 0; i--)
328
		kvfree(p->chunks[i].kdata);
329
	kvfree(p->chunks);
330
	p->chunks = NULL;
331
	p->nchunks = 0;
332
free_chunk:
333
	kvfree(chunk_array);
334

335
	return ret;
336
}
337

338
static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
339
			   struct amdgpu_cs_chunk *chunk,
340
			   unsigned int *ce_preempt,
341
			   unsigned int *de_preempt)
342
{
343
	struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
344
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
345
	struct amdgpu_vm *vm = &fpriv->vm;
346
	struct amdgpu_ring *ring;
347
	struct amdgpu_job *job;
348
	struct amdgpu_ib *ib;
349
	int r;
350

351
	r = amdgpu_cs_job_idx(p, chunk_ib);
352
	if (r < 0)
353
		return r;
354

355
	job = p->jobs[r];
356
	ring = amdgpu_job_ring(job);
357
	ib = &job->ibs[job->num_ibs++];
358

359
	/* submissions to kernel queues are disabled */
360
	if (ring->no_user_submission)
361
		return -EINVAL;
362

363
	/* MM engine doesn't support user fences */
364
	if (p->uf_bo && ring->funcs->no_user_fence)
365
		return -EINVAL;
366

367
	if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
368
	    chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
369
		if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
370
			(*ce_preempt)++;
371
		else
372
			(*de_preempt)++;
373

374
		/* Each GFX command submit allows only 1 IB max
375
		 * preemptible for CE & DE */
376
		if (*ce_preempt > 1 || *de_preempt > 1)
377
			return -EINVAL;
378
	}
379

380
	if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
381
		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
382

383
	r =  amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
384
			   chunk_ib->ib_bytes : 0,
385
			   AMDGPU_IB_POOL_DELAYED, ib);
386
	if (r) {
387
		drm_err(adev_to_drm(p->adev), "Failed to get ib !\n");
388
		return r;
389
	}
390

391
	ib->gpu_addr = chunk_ib->va_start;
392
	ib->length_dw = chunk_ib->ib_bytes / 4;
393
	ib->flags = chunk_ib->flags;
394
	return 0;
395
}
396

397
static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
398
				     struct amdgpu_cs_chunk *chunk)
399
{
400
	struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
401
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
402
	unsigned int num_deps;
403
	int i, r;
404

405
	num_deps = chunk->length_dw * 4 /
406
		sizeof(struct drm_amdgpu_cs_chunk_dep);
407

408
	for (i = 0; i < num_deps; ++i) {
409
		struct amdgpu_ctx *ctx;
410
		struct drm_sched_entity *entity;
411
		struct dma_fence *fence;
412

413
		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
414
		if (ctx == NULL)
415
			return -EINVAL;
416

417
		r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
418
					  deps[i].ip_instance,
419
					  deps[i].ring, &entity);
420
		if (r) {
421
			amdgpu_ctx_put(ctx);
422
			return r;
423
		}
424

425
		fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
426
		amdgpu_ctx_put(ctx);
427

428
		if (IS_ERR(fence))
429
			return PTR_ERR(fence);
430
		else if (!fence)
431
			continue;
432

433
		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
434
			struct drm_sched_fence *s_fence;
435
			struct dma_fence *old = fence;
436

437
			s_fence = to_drm_sched_fence(fence);
438
			fence = dma_fence_get(&s_fence->scheduled);
439
			dma_fence_put(old);
440
		}
441

442
		r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
443
		dma_fence_put(fence);
444
		if (r)
445
			return r;
446
	}
447
	return 0;
448
}
449

450
static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
451
					 uint32_t handle, u64 point,
452
					 u64 flags)
453
{
454
	struct dma_fence *fence;
455
	int r;
456

457
	r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
458
	if (r) {
459
		drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n",
460
			  handle, point, r);
461
		return r;
462
	}
463

464
	r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
465
	dma_fence_put(fence);
466
	return r;
467
}
468

469
static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
470
				   struct amdgpu_cs_chunk *chunk)
471
{
472
	struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
473
	unsigned int num_deps;
474
	int i, r;
475

476
	num_deps = chunk->length_dw * 4 /
477
		sizeof(struct drm_amdgpu_cs_chunk_sem);
478
	for (i = 0; i < num_deps; ++i) {
479
		r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
480
		if (r)
481
			return r;
482
	}
483

484
	return 0;
485
}
486

487
static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
488
					      struct amdgpu_cs_chunk *chunk)
489
{
490
	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
491
	unsigned int num_deps;
492
	int i, r;
493

494
	num_deps = chunk->length_dw * 4 /
495
		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
496
	for (i = 0; i < num_deps; ++i) {
497
		r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
498
						  syncobj_deps[i].point,
499
						  syncobj_deps[i].flags);
500
		if (r)
501
			return r;
502
	}
503

504
	return 0;
505
}
506

507
static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
508
				    struct amdgpu_cs_chunk *chunk)
509
{
510
	struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
511
	unsigned int num_deps;
512
	int i;
513

514
	num_deps = chunk->length_dw * 4 /
515
		sizeof(struct drm_amdgpu_cs_chunk_sem);
516

517
	if (p->post_deps)
518
		return -EINVAL;
519

520
	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
521
				     GFP_KERNEL);
522
	p->num_post_deps = 0;
523

524
	if (!p->post_deps)
525
		return -ENOMEM;
526

527

528
	for (i = 0; i < num_deps; ++i) {
529
		p->post_deps[i].syncobj =
530
			drm_syncobj_find(p->filp, deps[i].handle);
531
		if (!p->post_deps[i].syncobj)
532
			return -EINVAL;
533
		p->post_deps[i].chain = NULL;
534
		p->post_deps[i].point = 0;
535
		p->num_post_deps++;
536
	}
537

538
	return 0;
539
}
540

541
static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
542
						struct amdgpu_cs_chunk *chunk)
543
{
544
	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
545
	unsigned int num_deps;
546
	int i;
547

548
	num_deps = chunk->length_dw * 4 /
549
		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
550

551
	if (p->post_deps)
552
		return -EINVAL;
553

554
	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
555
				     GFP_KERNEL);
556
	p->num_post_deps = 0;
557

558
	if (!p->post_deps)
559
		return -ENOMEM;
560

561
	for (i = 0; i < num_deps; ++i) {
562
		struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
563

564
		dep->chain = NULL;
565
		if (syncobj_deps[i].point) {
566
			dep->chain = dma_fence_chain_alloc();
567
			if (!dep->chain)
568
				return -ENOMEM;
569
		}
570

571
		dep->syncobj = drm_syncobj_find(p->filp,
572
						syncobj_deps[i].handle);
573
		if (!dep->syncobj) {
574
			dma_fence_chain_free(dep->chain);
575
			return -EINVAL;
576
		}
577
		dep->point = syncobj_deps[i].point;
578
		p->num_post_deps++;
579
	}
580

581
	return 0;
582
}
583

584
static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
585
			       struct amdgpu_cs_chunk *chunk)
586
{
587
	struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
588
	int i;
589

590
	if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
591
		return -EINVAL;
592

593
	for (i = 0; i < p->gang_size; ++i) {
594
		p->jobs[i]->shadow_va = shadow->shadow_va;
595
		p->jobs[i]->csa_va = shadow->csa_va;
596
		p->jobs[i]->gds_va = shadow->gds_va;
597
		p->jobs[i]->init_shadow =
598
			shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
599
	}
600

601
	return 0;
602
}
603

604
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
605
{
606
	unsigned int ce_preempt = 0, de_preempt = 0;
607
	int i, r;
608

609
	for (i = 0; i < p->nchunks; ++i) {
610
		struct amdgpu_cs_chunk *chunk;
611

612
		chunk = &p->chunks[i];
613

614
		switch (chunk->chunk_id) {
615
		case AMDGPU_CHUNK_ID_IB:
616
			r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
617
			if (r)
618
				return r;
619
			break;
620
		case AMDGPU_CHUNK_ID_DEPENDENCIES:
621
		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
622
			r = amdgpu_cs_p2_dependencies(p, chunk);
623
			if (r)
624
				return r;
625
			break;
626
		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
627
			r = amdgpu_cs_p2_syncobj_in(p, chunk);
628
			if (r)
629
				return r;
630
			break;
631
		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
632
			r = amdgpu_cs_p2_syncobj_out(p, chunk);
633
			if (r)
634
				return r;
635
			break;
636
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
637
			r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
638
			if (r)
639
				return r;
640
			break;
641
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
642
			r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
643
			if (r)
644
				return r;
645
			break;
646
		case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
647
			r = amdgpu_cs_p2_shadow(p, chunk);
648
			if (r)
649
				return r;
650
			break;
651
		}
652
	}
653

654
	return 0;
655
}
656

657
/* Convert microseconds to bytes. */
658
static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
659
{
660
	if (us <= 0 || !adev->mm_stats.log2_max_MBps)
661
		return 0;
662

663
	/* Since accum_us is incremented by a million per second, just
664
	 * multiply it by the number of MB/s to get the number of bytes.
665
	 */
666
	return us << adev->mm_stats.log2_max_MBps;
667
}
668

669
static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
670
{
671
	if (!adev->mm_stats.log2_max_MBps)
672
		return 0;
673

674
	return bytes >> adev->mm_stats.log2_max_MBps;
675
}
676

677
/* Returns how many bytes TTM can move right now. If no bytes can be moved,
678
 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
679
 * which means it can go over the threshold once. If that happens, the driver
680
 * will be in debt and no other buffer migrations can be done until that debt
681
 * is repaid.
682
 *
683
 * This approach allows moving a buffer of any size (it's important to allow
684
 * that).
685
 *
686
 * The currency is simply time in microseconds and it increases as the clock
687
 * ticks. The accumulated microseconds (us) are converted to bytes and
688
 * returned.
689
 */
690
static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
691
					      u64 *max_bytes,
692
					      u64 *max_vis_bytes)
693
{
694
	s64 time_us, increment_us;
695
	u64 free_vram, total_vram, used_vram;
696
	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
697
	 * throttling.
698
	 *
699
	 * It means that in order to get full max MBps, at least 5 IBs per
700
	 * second must be submitted and not more than 200ms apart from each
701
	 * other.
702
	 */
703
	const s64 us_upper_bound = 200000;
704

705
	if (!adev->mm_stats.log2_max_MBps) {
706
		*max_bytes = 0;
707
		*max_vis_bytes = 0;
708
		return;
709
	}
710

711
	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
712
	used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
713
	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
714

715
	spin_lock(&adev->mm_stats.lock);
716

717
	/* Increase the amount of accumulated us. */
718
	time_us = ktime_to_us(ktime_get());
719
	increment_us = time_us - adev->mm_stats.last_update_us;
720
	adev->mm_stats.last_update_us = time_us;
721
	adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
722
				      us_upper_bound);
723

724
	/* This prevents the short period of low performance when the VRAM
725
	 * usage is low and the driver is in debt or doesn't have enough
726
	 * accumulated us to fill VRAM quickly.
727
	 *
728
	 * The situation can occur in these cases:
729
	 * - a lot of VRAM is freed by userspace
730
	 * - the presence of a big buffer causes a lot of evictions
731
	 *   (solution: split buffers into smaller ones)
732
	 *
733
	 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
734
	 * accum_us to a positive number.
735
	 */
736
	if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
737
		s64 min_us;
738

739
		/* Be more aggressive on dGPUs. Try to fill a portion of free
740
		 * VRAM now.
741
		 */
742
		if (!(adev->flags & AMD_IS_APU))
743
			min_us = bytes_to_us(adev, free_vram / 4);
744
		else
745
			min_us = 0; /* Reset accum_us on APUs. */
746

747
		adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
748
	}
749

750
	/* This is set to 0 if the driver is in debt to disallow (optional)
751
	 * buffer moves.
752
	 */
753
	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
754

755
	/* Do the same for visible VRAM if half of it is free */
756
	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
757
		u64 total_vis_vram = adev->gmc.visible_vram_size;
758
		u64 used_vis_vram =
759
		  amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
760

761
		if (used_vis_vram < total_vis_vram) {
762
			u64 free_vis_vram = total_vis_vram - used_vis_vram;
763

764
			adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
765
							  increment_us, us_upper_bound);
766

767
			if (free_vis_vram >= total_vis_vram / 2)
768
				adev->mm_stats.accum_us_vis =
769
					max(bytes_to_us(adev, free_vis_vram / 2),
770
					    adev->mm_stats.accum_us_vis);
771
		}
772

773
		*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
774
	} else {
775
		*max_vis_bytes = 0;
776
	}
777

778
	spin_unlock(&adev->mm_stats.lock);
779
}
780

781
/* Report how many bytes have really been moved for the last command
782
 * submission. This can result in a debt that can stop buffer migrations
783
 * temporarily.
784
 */
785
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
786
				  u64 num_vis_bytes)
787
{
788
	spin_lock(&adev->mm_stats.lock);
789
	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
790
	adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
791
	spin_unlock(&adev->mm_stats.lock);
792
}
793

794
static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
795
{
796
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
797
	struct amdgpu_cs_parser *p = param;
798
	struct ttm_operation_ctx ctx = {
799
		.interruptible = true,
800
		.no_wait_gpu = false,
801
		.resv = bo->tbo.base.resv
802
	};
803
	uint32_t domain;
804
	int r;
805

806
	if (bo->tbo.pin_count)
807
		return 0;
808

809
	/* Don't move this buffer if we have depleted our allowance
810
	 * to move it. Don't move anything if the threshold is zero.
811
	 */
812
	if (p->bytes_moved < p->bytes_moved_threshold &&
813
	    (!bo->tbo.base.dma_buf ||
814
	    list_empty(&bo->tbo.base.dma_buf->attachments))) {
815
		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
816
		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
817
			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
818
			 * visible VRAM if we've depleted our allowance to do
819
			 * that.
820
			 */
821
			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
822
				domain = bo->preferred_domains;
823
			else
824
				domain = bo->allowed_domains;
825
		} else {
826
			domain = bo->preferred_domains;
827
		}
828
	} else {
829
		domain = bo->allowed_domains;
830
	}
831

832
retry:
833
	amdgpu_bo_placement_from_domain(bo, domain);
834
	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
835

836
	p->bytes_moved += ctx.bytes_moved;
837
	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
838
	    amdgpu_res_cpu_visible(adev, bo->tbo.resource))
839
		p->bytes_moved_vis += ctx.bytes_moved;
840

841
	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
842
		domain = bo->allowed_domains;
843
		goto retry;
844
	}
845

846
	return r;
847
}
848

849
static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
850
				union drm_amdgpu_cs *cs)
851
{
852
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
853
	struct ttm_operation_ctx ctx = { true, false };
854
	struct amdgpu_vm *vm = &fpriv->vm;
855
	struct amdgpu_bo_list_entry *e;
856
	struct drm_gem_object *obj;
857
	unsigned long index;
858
	unsigned int i;
859
	int r;
860

861
	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
862
	if (cs->in.bo_list_handle) {
863
		if (p->bo_list)
864
			return -EINVAL;
865

866
		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
867
				       &p->bo_list);
868
		if (r)
869
			return r;
870
	} else if (!p->bo_list) {
871
		/* Create a empty bo_list when no handle is provided */
872
		r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
873
					  &p->bo_list);
874
		if (r)
875
			return r;
876
	}
877

878
	mutex_lock(&p->bo_list->bo_list_mutex);
879

880
	/* Get userptr backing pages. If pages are updated after registered
881
	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
882
	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
883
	 */
884
	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
885
		bool userpage_invalidated = false;
886
		struct amdgpu_bo *bo = e->bo;
887

888
		r = amdgpu_ttm_tt_get_user_pages(bo, &e->range);
889
		if (r)
890
			goto out_free_user_pages;
891

892
		for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
893
			if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) {
894
				userpage_invalidated = true;
895
				break;
896
			}
897
		}
898
		e->user_invalidated = userpage_invalidated;
899
	}
900

901
	drm_exec_until_all_locked(&p->exec) {
902
		r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size);
903
		drm_exec_retry_on_contention(&p->exec);
904
		if (unlikely(r))
905
			goto out_free_user_pages;
906

907
		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
908
			/* One fence for TTM and one for each CS job */
909
			r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,
910
						 1 + p->gang_size);
911
			drm_exec_retry_on_contention(&p->exec);
912
			if (unlikely(r))
913
				goto out_free_user_pages;
914

915
			e->bo_va = amdgpu_vm_bo_find(vm, e->bo);
916
		}
917

918
		if (p->uf_bo) {
919
			r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
920
						 1 + p->gang_size);
921
			drm_exec_retry_on_contention(&p->exec);
922
			if (unlikely(r))
923
				goto out_free_user_pages;
924
		}
925
	}
926

927
	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
928
		struct mm_struct *usermm;
929

930
		usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
931
		if (usermm && usermm != current->mm) {
932
			r = -EPERM;
933
			goto out_free_user_pages;
934
		}
935

936
		if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
937
		    e->user_invalidated) {
938
			amdgpu_bo_placement_from_domain(e->bo,
939
							AMDGPU_GEM_DOMAIN_CPU);
940
			r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
941
					    &ctx);
942
			if (r)
943
				goto out_free_user_pages;
944

945
			amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
946
						     e->range);
947
		}
948
	}
949

950
	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
951
					  &p->bytes_moved_vis_threshold);
952
	p->bytes_moved = 0;
953
	p->bytes_moved_vis = 0;
954

955
	r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
956
			       amdgpu_cs_bo_validate, p);
957
	if (r) {
958
		drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n");
959
		goto out_free_user_pages;
960
	}
961

962
	drm_exec_for_each_locked_object(&p->exec, index, obj) {
963
		r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));
964
		if (unlikely(r))
965
			goto out_free_user_pages;
966
	}
967

968
	if (p->uf_bo) {
969
		r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo);
970
		if (unlikely(r))
971
			goto out_free_user_pages;
972

973
		p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);
974
	}
975

976
	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
977
				     p->bytes_moved_vis);
978

979
	for (i = 0; i < p->gang_size; ++i)
980
		amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
981
					 p->bo_list->gws_obj,
982
					 p->bo_list->oa_obj);
983
	return 0;
984

985
out_free_user_pages:
986
	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
987
		struct amdgpu_bo *bo = e->bo;
988

989
		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
990
		e->range = NULL;
991
	}
992
	mutex_unlock(&p->bo_list->bo_list_mutex);
993
	return r;
994
}
995

996
static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
997
{
998
	int i, j;
999

1000
	if (!trace_amdgpu_cs_enabled())
1001
		return;
1002

1003
	for (i = 0; i < p->gang_size; ++i) {
1004
		struct amdgpu_job *job = p->jobs[i];
1005

1006
		for (j = 0; j < job->num_ibs; ++j)
1007
			trace_amdgpu_cs(p, job, &job->ibs[j]);
1008
	}
1009
}
1010

1011
static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
1012
			       struct amdgpu_job *job)
1013
{
1014
	struct amdgpu_ring *ring = amdgpu_job_ring(job);
1015
	unsigned int i;
1016
	int r;
1017

1018
	/* Only for UVD/VCE VM emulation */
1019
	if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
1020
		return 0;
1021

1022
	for (i = 0; i < job->num_ibs; ++i) {
1023
		struct amdgpu_ib *ib = &job->ibs[i];
1024
		struct amdgpu_bo_va_mapping *m;
1025
		struct amdgpu_bo *aobj;
1026
		uint64_t va_start;
1027
		uint8_t *kptr;
1028

1029
		va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
1030
		r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
1031
		if (r) {
1032
			drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n");
1033
			return r;
1034
		}
1035

1036
		if ((va_start + ib->length_dw * 4) >
1037
		    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
1038
			drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n");
1039
			return -EINVAL;
1040
		}
1041

1042
		/* the IB should be reserved at this point */
1043
		r = amdgpu_bo_kmap(aobj, (void **)&kptr);
1044
		if (r)
1045
			return r;
1046

1047
		kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
1048

1049
		if (ring->funcs->parse_cs) {
1050
			memcpy(ib->ptr, kptr, ib->length_dw * 4);
1051
			amdgpu_bo_kunmap(aobj);
1052

1053
			r = amdgpu_ring_parse_cs(ring, p, job, ib);
1054
			if (r)
1055
				return r;
1056

1057
			if (ib->sa_bo)
1058
				ib->gpu_addr =  amdgpu_sa_bo_gpu_addr(ib->sa_bo);
1059
		} else {
1060
			ib->ptr = (uint32_t *)kptr;
1061
			r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
1062
			amdgpu_bo_kunmap(aobj);
1063
			if (r)
1064
				return r;
1065
		}
1066
	}
1067

1068
	return 0;
1069
}
1070

1071
static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
1072
{
1073
	unsigned int i;
1074
	int r;
1075

1076
	for (i = 0; i < p->gang_size; ++i) {
1077
		r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
1078
		if (r)
1079
			return r;
1080
	}
1081
	return 0;
1082
}
1083

1084
static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
1085
{
1086
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1087
	struct amdgpu_job *job = p->gang_leader;
1088
	struct amdgpu_device *adev = p->adev;
1089
	struct amdgpu_vm *vm = &fpriv->vm;
1090
	struct amdgpu_bo_list_entry *e;
1091
	struct amdgpu_bo_va *bo_va;
1092
	unsigned int i;
1093
	int r;
1094

1095
	/*
1096
	 * We can't use gang submit on with reserved VMIDs when the VM changes
1097
	 * can't be invalidated by more than one engine at the same time.
1098
	 */
1099
	if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {
1100
		for (i = 0; i < p->gang_size; ++i) {
1101
			struct drm_sched_entity *entity = p->entities[i];
1102
			struct drm_gpu_scheduler *sched = entity->rq->sched;
1103
			struct amdgpu_ring *ring = to_amdgpu_ring(sched);
1104

1105
			if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
1106
				return -EINVAL;
1107
		}
1108
	}
1109

1110
	if (!amdgpu_vm_ready(vm))
1111
		return -EINVAL;
1112

1113
	r = amdgpu_vm_clear_freed(adev, vm, NULL);
1114
	if (r)
1115
		return r;
1116

1117
	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
1118
	if (r)
1119
		return r;
1120

1121
	r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
1122
			      GFP_KERNEL);
1123
	if (r)
1124
		return r;
1125

1126
	if (fpriv->csa_va) {
1127
		bo_va = fpriv->csa_va;
1128
		BUG_ON(!bo_va);
1129
		r = amdgpu_vm_bo_update(adev, bo_va, false);
1130
		if (r)
1131
			return r;
1132

1133
		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
1134
				      GFP_KERNEL);
1135
		if (r)
1136
			return r;
1137
	}
1138

1139
	/* FIXME: In theory this loop shouldn't be needed any more when
1140
	 * amdgpu_vm_handle_moved handles all moved BOs that are reserved
1141
	 * with p->ticket. But removing it caused test regressions, so I'm
1142
	 * leaving it here for now.
1143
	 */
1144
	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
1145
		bo_va = e->bo_va;
1146
		if (bo_va == NULL)
1147
			continue;
1148

1149
		r = amdgpu_vm_bo_update(adev, bo_va, false);
1150
		if (r)
1151
			return r;
1152

1153
		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
1154
				      GFP_KERNEL);
1155
		if (r)
1156
			return r;
1157
	}
1158

1159
	r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
1160
	if (r)
1161
		return r;
1162

1163
	r = amdgpu_vm_update_pdes(adev, vm, false);
1164
	if (r)
1165
		return r;
1166

1167
	r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
1168
	if (r)
1169
		return r;
1170

1171
	for (i = 0; i < p->gang_size; ++i) {
1172
		job = p->jobs[i];
1173

1174
		if (!job->vm)
1175
			continue;
1176

1177
		job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
1178
	}
1179

1180
	if (adev->debug_vm) {
1181
		/* Invalidate all BOs to test for userspace bugs */
1182
		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
1183
			struct amdgpu_bo *bo = e->bo;
1184

1185
			/* ignore duplicates */
1186
			if (!bo)
1187
				continue;
1188

1189
			amdgpu_vm_bo_invalidate(bo, false);
1190
		}
1191
	}
1192

1193
	return 0;
1194
}
1195

1196
static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
1197
{
1198
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1199
	struct drm_gpu_scheduler *sched;
1200
	struct drm_gem_object *obj;
1201
	struct dma_fence *fence;
1202
	unsigned long index;
1203
	unsigned int i;
1204
	int r;
1205

1206
	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
1207
	if (r) {
1208
		if (r != -ERESTARTSYS)
1209
			drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n");
1210
		return r;
1211
	}
1212

1213
	drm_exec_for_each_locked_object(&p->exec, index, obj) {
1214
		struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
1215

1216
		struct dma_resv *resv = bo->tbo.base.resv;
1217
		enum amdgpu_sync_mode sync_mode;
1218

1219
		sync_mode = amdgpu_bo_explicit_sync(bo) ?
1220
			AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
1221
		r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
1222
				     &fpriv->vm);
1223
		if (r)
1224
			return r;
1225
	}
1226

1227
	for (i = 0; i < p->gang_size; ++i) {
1228
		r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
1229
		if (r)
1230
			return r;
1231
	}
1232

1233
	sched = p->gang_leader->base.entity->rq->sched;
1234
	while ((fence = amdgpu_sync_get_fence(&p->sync))) {
1235
		struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
1236

1237
		/*
1238
		 * When we have an dependency it might be necessary to insert a
1239
		 * pipeline sync to make sure that all caches etc are flushed and the
1240
		 * next job actually sees the results from the previous one
1241
		 * before we start executing on the same scheduler ring.
1242
		 */
1243
		if (!s_fence || s_fence->sched != sched) {
1244
			dma_fence_put(fence);
1245
			continue;
1246
		}
1247

1248
		r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
1249
				      GFP_KERNEL);
1250
		dma_fence_put(fence);
1251
		if (r)
1252
			return r;
1253
	}
1254
	return 0;
1255
}
1256

1257
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1258
{
1259
	int i;
1260

1261
	for (i = 0; i < p->num_post_deps; ++i) {
1262
		if (p->post_deps[i].chain && p->post_deps[i].point) {
1263
			drm_syncobj_add_point(p->post_deps[i].syncobj,
1264
					      p->post_deps[i].chain,
1265
					      p->fence, p->post_deps[i].point);
1266
			p->post_deps[i].chain = NULL;
1267
		} else {
1268
			drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1269
						  p->fence);
1270
		}
1271
	}
1272
}
1273

1274
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1275
			    union drm_amdgpu_cs *cs)
1276
{
1277
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1278
	struct amdgpu_job *leader = p->gang_leader;
1279
	struct amdgpu_bo_list_entry *e;
1280
	struct drm_gem_object *gobj;
1281
	unsigned long index;
1282
	unsigned int i;
1283
	uint64_t seq;
1284
	int r;
1285

1286
	for (i = 0; i < p->gang_size; ++i)
1287
		drm_sched_job_arm(&p->jobs[i]->base);
1288

1289
	for (i = 0; i < p->gang_size; ++i) {
1290
		struct dma_fence *fence;
1291

1292
		if (p->jobs[i] == leader)
1293
			continue;
1294

1295
		fence = &p->jobs[i]->base.s_fence->scheduled;
1296
		dma_fence_get(fence);
1297
		r = drm_sched_job_add_dependency(&leader->base, fence);
1298
		if (r) {
1299
			dma_fence_put(fence);
1300
			return r;
1301
		}
1302
	}
1303

1304
	if (p->gang_size > 1) {
1305
		for (i = 0; i < p->gang_size; ++i)
1306
			amdgpu_job_set_gang_leader(p->jobs[i], leader);
1307
	}
1308

1309
	/* No memory allocation is allowed while holding the notifier lock.
1310
	 * The lock is held until amdgpu_cs_submit is finished and fence is
1311
	 * added to BOs.
1312
	 */
1313
	mutex_lock(&p->adev->notifier_lock);
1314

1315
	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1316
	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1317
	 */
1318
	r = 0;
1319
	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1320
		r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,
1321
							e->range);
1322
		e->range = NULL;
1323
	}
1324
	if (r) {
1325
		r = -EAGAIN;
1326
		mutex_unlock(&p->adev->notifier_lock);
1327
		return r;
1328
	}
1329

1330
	p->fence = dma_fence_get(&leader->base.s_fence->finished);
1331
	drm_exec_for_each_locked_object(&p->exec, index, gobj) {
1332

1333
		ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);
1334

1335
		/* Everybody except for the gang leader uses READ */
1336
		for (i = 0; i < p->gang_size; ++i) {
1337
			if (p->jobs[i] == leader)
1338
				continue;
1339

1340
			dma_resv_add_fence(gobj->resv,
1341
					   &p->jobs[i]->base.s_fence->finished,
1342
					   DMA_RESV_USAGE_READ);
1343
		}
1344

1345
		/* The gang leader as remembered as writer */
1346
		dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);
1347
	}
1348

1349
	seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
1350
				   p->fence);
1351
	amdgpu_cs_post_dependencies(p);
1352

1353
	if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1354
	    !p->ctx->preamble_presented) {
1355
		leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1356
		p->ctx->preamble_presented = true;
1357
	}
1358

1359
	cs->out.handle = seq;
1360
	leader->uf_sequence = seq;
1361

1362
	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket);
1363
	for (i = 0; i < p->gang_size; ++i) {
1364
		amdgpu_job_free_resources(p->jobs[i]);
1365
		trace_amdgpu_cs_ioctl(p->jobs[i]);
1366
		drm_sched_entity_push_job(&p->jobs[i]->base);
1367
		p->jobs[i] = NULL;
1368
	}
1369

1370
	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1371

1372
	mutex_unlock(&p->adev->notifier_lock);
1373
	mutex_unlock(&p->bo_list->bo_list_mutex);
1374
	return 0;
1375
}
1376

1377
/* Cleanup the parser structure */
1378
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
1379
{
1380
	unsigned int i;
1381

1382
	amdgpu_sync_free(&parser->sync);
1383
	drm_exec_fini(&parser->exec);
1384

1385
	for (i = 0; i < parser->num_post_deps; i++) {
1386
		drm_syncobj_put(parser->post_deps[i].syncobj);
1387
		kfree(parser->post_deps[i].chain);
1388
	}
1389
	kfree(parser->post_deps);
1390

1391
	dma_fence_put(parser->fence);
1392

1393
	if (parser->ctx)
1394
		amdgpu_ctx_put(parser->ctx);
1395
	if (parser->bo_list)
1396
		amdgpu_bo_list_put(parser->bo_list);
1397

1398
	for (i = 0; i < parser->nchunks; i++)
1399
		kvfree(parser->chunks[i].kdata);
1400
	kvfree(parser->chunks);
1401
	for (i = 0; i < parser->gang_size; ++i) {
1402
		if (parser->jobs[i])
1403
			amdgpu_job_free(parser->jobs[i]);
1404
	}
1405
	amdgpu_bo_unref(&parser->uf_bo);
1406
}
1407

1408
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1409
{
1410
	struct amdgpu_device *adev = drm_to_adev(dev);
1411
	struct amdgpu_cs_parser parser;
1412
	int r;
1413

1414
	if (amdgpu_ras_intr_triggered())
1415
		return -EHWPOISON;
1416

1417
	if (!adev->accel_working)
1418
		return -EBUSY;
1419

1420
	r = amdgpu_cs_parser_init(&parser, adev, filp, data);
1421
	if (r) {
1422
		drm_err_ratelimited(dev, "Failed to initialize parser %d!\n", r);
1423
		return r;
1424
	}
1425

1426
	r = amdgpu_cs_pass1(&parser, data);
1427
	if (r)
1428
		goto error_fini;
1429

1430
	r = amdgpu_cs_pass2(&parser);
1431
	if (r)
1432
		goto error_fini;
1433

1434
	r = amdgpu_cs_parser_bos(&parser, data);
1435
	if (r) {
1436
		if (r == -ENOMEM)
1437
			drm_err(dev, "Not enough memory for command submission!\n");
1438
		else if (r != -ERESTARTSYS && r != -EAGAIN)
1439
			drm_dbg(dev, "Failed to process the buffer list %d!\n", r);
1440
		goto error_fini;
1441
	}
1442

1443
	r = amdgpu_cs_patch_jobs(&parser);
1444
	if (r)
1445
		goto error_backoff;
1446

1447
	r = amdgpu_cs_vm_handling(&parser);
1448
	if (r)
1449
		goto error_backoff;
1450

1451
	r = amdgpu_cs_sync_rings(&parser);
1452
	if (r)
1453
		goto error_backoff;
1454

1455
	trace_amdgpu_cs_ibs(&parser);
1456

1457
	r = amdgpu_cs_submit(&parser, data);
1458
	if (r)
1459
		goto error_backoff;
1460

1461
	amdgpu_cs_parser_fini(&parser);
1462
	return 0;
1463

1464
error_backoff:
1465
	mutex_unlock(&parser.bo_list->bo_list_mutex);
1466

1467
error_fini:
1468
	amdgpu_cs_parser_fini(&parser);
1469
	return r;
1470
}
1471

1472
/**
1473
 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1474
 *
1475
 * @dev: drm device
1476
 * @data: data from userspace
1477
 * @filp: file private
1478
 *
1479
 * Wait for the command submission identified by handle to finish.
1480
 */
1481
int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1482
			 struct drm_file *filp)
1483
{
1484
	union drm_amdgpu_wait_cs *wait = data;
1485
	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1486
	struct drm_sched_entity *entity;
1487
	struct amdgpu_ctx *ctx;
1488
	struct dma_fence *fence;
1489
	long r;
1490

1491
	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1492
	if (ctx == NULL)
1493
		return -EINVAL;
1494

1495
	r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1496
				  wait->in.ring, &entity);
1497
	if (r) {
1498
		amdgpu_ctx_put(ctx);
1499
		return r;
1500
	}
1501

1502
	fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1503
	if (IS_ERR(fence))
1504
		r = PTR_ERR(fence);
1505
	else if (fence) {
1506
		r = dma_fence_wait_timeout(fence, true, timeout);
1507
		if (r > 0 && fence->error)
1508
			r = fence->error;
1509
		dma_fence_put(fence);
1510
	} else
1511
		r = 1;
1512

1513
	amdgpu_ctx_put(ctx);
1514
	if (r < 0)
1515
		return r;
1516

1517
	memset(wait, 0, sizeof(*wait));
1518
	wait->out.status = (r == 0);
1519

1520
	return 0;
1521
}
1522

1523
/**
1524
 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1525
 *
1526
 * @adev: amdgpu device
1527
 * @filp: file private
1528
 * @user: drm_amdgpu_fence copied from user space
1529
 */
1530
static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1531
					     struct drm_file *filp,
1532
					     struct drm_amdgpu_fence *user)
1533
{
1534
	struct drm_sched_entity *entity;
1535
	struct amdgpu_ctx *ctx;
1536
	struct dma_fence *fence;
1537
	int r;
1538

1539
	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1540
	if (ctx == NULL)
1541
		return ERR_PTR(-EINVAL);
1542

1543
	r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1544
				  user->ring, &entity);
1545
	if (r) {
1546
		amdgpu_ctx_put(ctx);
1547
		return ERR_PTR(r);
1548
	}
1549

1550
	fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1551
	amdgpu_ctx_put(ctx);
1552

1553
	return fence;
1554
}
1555

1556
int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1557
				    struct drm_file *filp)
1558
{
1559
	struct amdgpu_device *adev = drm_to_adev(dev);
1560
	union drm_amdgpu_fence_to_handle *info = data;
1561
	struct dma_fence *fence;
1562
	struct drm_syncobj *syncobj;
1563
	struct sync_file *sync_file;
1564
	int fd, r;
1565

1566
	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1567
	if (IS_ERR(fence))
1568
		return PTR_ERR(fence);
1569

1570
	if (!fence)
1571
		fence = dma_fence_get_stub();
1572

1573
	switch (info->in.what) {
1574
	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1575
		r = drm_syncobj_create(&syncobj, 0, fence);
1576
		dma_fence_put(fence);
1577
		if (r)
1578
			return r;
1579
		r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1580
		drm_syncobj_put(syncobj);
1581
		return r;
1582

1583
	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1584
		r = drm_syncobj_create(&syncobj, 0, fence);
1585
		dma_fence_put(fence);
1586
		if (r)
1587
			return r;
1588
		r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);
1589
		drm_syncobj_put(syncobj);
1590
		return r;
1591

1592
	case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1593
		fd = get_unused_fd_flags(O_CLOEXEC);
1594
		if (fd < 0) {
1595
			dma_fence_put(fence);
1596
			return fd;
1597
		}
1598

1599
		sync_file = sync_file_create(fence);
1600
		dma_fence_put(fence);
1601
		if (!sync_file) {
1602
			put_unused_fd(fd);
1603
			return -ENOMEM;
1604
		}
1605

1606
		fd_install(fd, sync_file->file);
1607
		info->out.handle = fd;
1608
		return 0;
1609

1610
	default:
1611
		dma_fence_put(fence);
1612
		return -EINVAL;
1613
	}
1614
}
1615

1616
/**
1617
 * amdgpu_cs_wait_all_fences - wait on all fences to signal
1618
 *
1619
 * @adev: amdgpu device
1620
 * @filp: file private
1621
 * @wait: wait parameters
1622
 * @fences: array of drm_amdgpu_fence
1623
 */
1624
static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1625
				     struct drm_file *filp,
1626
				     union drm_amdgpu_wait_fences *wait,
1627
				     struct drm_amdgpu_fence *fences)
1628
{
1629
	uint32_t fence_count = wait->in.fence_count;
1630
	unsigned int i;
1631
	long r = 1;
1632

1633
	for (i = 0; i < fence_count; i++) {
1634
		struct dma_fence *fence;
1635
		unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1636

1637
		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1638
		if (IS_ERR(fence))
1639
			return PTR_ERR(fence);
1640
		else if (!fence)
1641
			continue;
1642

1643
		r = dma_fence_wait_timeout(fence, true, timeout);
1644
		if (r > 0 && fence->error)
1645
			r = fence->error;
1646

1647
		dma_fence_put(fence);
1648
		if (r < 0)
1649
			return r;
1650

1651
		if (r == 0)
1652
			break;
1653
	}
1654

1655
	memset(wait, 0, sizeof(*wait));
1656
	wait->out.status = (r > 0);
1657

1658
	return 0;
1659
}
1660

1661
/**
1662
 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1663
 *
1664
 * @adev: amdgpu device
1665
 * @filp: file private
1666
 * @wait: wait parameters
1667
 * @fences: array of drm_amdgpu_fence
1668
 */
1669
static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1670
				    struct drm_file *filp,
1671
				    union drm_amdgpu_wait_fences *wait,
1672
				    struct drm_amdgpu_fence *fences)
1673
{
1674
	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1675
	uint32_t fence_count = wait->in.fence_count;
1676
	uint32_t first = ~0;
1677
	struct dma_fence **array;
1678
	unsigned int i;
1679
	long r;
1680

1681
	/* Prepare the fence array */
1682
	array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1683

1684
	if (array == NULL)
1685
		return -ENOMEM;
1686

1687
	for (i = 0; i < fence_count; i++) {
1688
		struct dma_fence *fence;
1689

1690
		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1691
		if (IS_ERR(fence)) {
1692
			r = PTR_ERR(fence);
1693
			goto err_free_fence_array;
1694
		} else if (fence) {
1695
			array[i] = fence;
1696
		} else { /* NULL, the fence has been already signaled */
1697
			r = 1;
1698
			first = i;
1699
			goto out;
1700
		}
1701
	}
1702

1703
	r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1704
				       &first);
1705
	if (r < 0)
1706
		goto err_free_fence_array;
1707

1708
out:
1709
	memset(wait, 0, sizeof(*wait));
1710
	wait->out.status = (r > 0);
1711
	wait->out.first_signaled = first;
1712

1713
	if (first < fence_count && array[first])
1714
		r = array[first]->error;
1715
	else
1716
		r = 0;
1717

1718
err_free_fence_array:
1719
	for (i = 0; i < fence_count; i++)
1720
		dma_fence_put(array[i]);
1721
	kfree(array);
1722

1723
	return r;
1724
}
1725

1726
/**
1727
 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1728
 *
1729
 * @dev: drm device
1730
 * @data: data from userspace
1731
 * @filp: file private
1732
 */
1733
int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1734
				struct drm_file *filp)
1735
{
1736
	struct amdgpu_device *adev = drm_to_adev(dev);
1737
	union drm_amdgpu_wait_fences *wait = data;
1738
	struct drm_amdgpu_fence *fences;
1739
	int r;
1740

1741
	/* Get the fences from userspace */
1742
	fences = memdup_array_user(u64_to_user_ptr(wait->in.fences),
1743
				   wait->in.fence_count,
1744
				   sizeof(struct drm_amdgpu_fence));
1745
	if (IS_ERR(fences))
1746
		return PTR_ERR(fences);
1747

1748
	if (wait->in.wait_all)
1749
		r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1750
	else
1751
		r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1752

1753
	kfree(fences);
1754

1755
	return r;
1756
}
1757

1758
/**
1759
 * amdgpu_cs_find_mapping - find bo_va for VM address
1760
 *
1761
 * @parser: command submission parser context
1762
 * @addr: VM address
1763
 * @bo: resulting BO of the mapping found
1764
 * @map: Placeholder to return found BO mapping
1765
 *
1766
 * Search the buffer objects in the command submission context for a certain
1767
 * virtual memory address. Returns allocation structure when found, NULL
1768
 * otherwise.
1769
 */
1770
int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1771
			   uint64_t addr, struct amdgpu_bo **bo,
1772
			   struct amdgpu_bo_va_mapping **map)
1773
{
1774
	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1775
	struct ttm_operation_ctx ctx = { false, false };
1776
	struct amdgpu_vm *vm = &fpriv->vm;
1777
	struct amdgpu_bo_va_mapping *mapping;
1778
	int i, r;
1779

1780
	addr /= AMDGPU_GPU_PAGE_SIZE;
1781

1782
	mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1783
	if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1784
		return -EINVAL;
1785

1786
	*bo = mapping->bo_va->base.bo;
1787
	*map = mapping;
1788

1789
	/* Double check that the BO is reserved by this CS */
1790
	if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
1791
		return -EINVAL;
1792

1793
	/* Make sure VRAM is allocated contigiously */
1794
	(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1795
	if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
1796
	    !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
1797

1798
		amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1799
		for (i = 0; i < (*bo)->placement.num_placement; i++)
1800
			(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
1801
		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1802
		if (r)
1803
			return r;
1804
	}
1805

1806
	return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1807
}
1808

1809
Product

Resources

Company