Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/rocket/rocket_job.c
29278 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* Copyright 2019 Linaro, Ltd, Rob Herring <[email protected]> */
3
/* Copyright 2019 Collabora ltd. */
4
/* Copyright 2024-2025 Tomeu Vizoso <[email protected]> */
5
6
#include <drm/drm_print.h>
7
#include <drm/drm_file.h>
8
#include <drm/drm_gem.h>
9
#include <drm/rocket_accel.h>
10
#include <linux/interrupt.h>
11
#include <linux/iommu.h>
12
#include <linux/platform_device.h>
13
#include <linux/pm_runtime.h>
14
15
#include "rocket_core.h"
16
#include "rocket_device.h"
17
#include "rocket_drv.h"
18
#include "rocket_job.h"
19
#include "rocket_registers.h"
20
21
#define JOB_TIMEOUT_MS 500
22
23
static struct rocket_job *
24
to_rocket_job(struct drm_sched_job *sched_job)
25
{
26
return container_of(sched_job, struct rocket_job, base);
27
}
28
29
static const char *rocket_fence_get_driver_name(struct dma_fence *fence)
30
{
31
return "rocket";
32
}
33
34
static const char *rocket_fence_get_timeline_name(struct dma_fence *fence)
35
{
36
return "rockchip-npu";
37
}
38
39
static const struct dma_fence_ops rocket_fence_ops = {
40
.get_driver_name = rocket_fence_get_driver_name,
41
.get_timeline_name = rocket_fence_get_timeline_name,
42
};
43
44
static struct dma_fence *rocket_fence_create(struct rocket_core *core)
45
{
46
struct dma_fence *fence;
47
48
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
49
if (!fence)
50
return ERR_PTR(-ENOMEM);
51
52
dma_fence_init(fence, &rocket_fence_ops, &core->fence_lock,
53
core->fence_context, ++core->emit_seqno);
54
55
return fence;
56
}
57
58
static int
59
rocket_copy_tasks(struct drm_device *dev,
60
struct drm_file *file_priv,
61
struct drm_rocket_job *job,
62
struct rocket_job *rjob)
63
{
64
int ret = 0;
65
66
if (job->task_struct_size < sizeof(struct drm_rocket_task))
67
return -EINVAL;
68
69
rjob->task_count = job->task_count;
70
71
if (!rjob->task_count)
72
return 0;
73
74
rjob->tasks = kvmalloc_array(job->task_count, sizeof(*rjob->tasks), GFP_KERNEL);
75
if (!rjob->tasks) {
76
drm_dbg(dev, "Failed to allocate task array\n");
77
return -ENOMEM;
78
}
79
80
for (int i = 0; i < rjob->task_count; i++) {
81
struct drm_rocket_task task = {0};
82
83
if (copy_from_user(&task,
84
u64_to_user_ptr(job->tasks) + i * job->task_struct_size,
85
sizeof(task))) {
86
drm_dbg(dev, "Failed to copy incoming tasks\n");
87
ret = -EFAULT;
88
goto fail;
89
}
90
91
if (task.regcmd_count == 0) {
92
drm_dbg(dev, "regcmd_count field in drm_rocket_task should be > 0.\n");
93
ret = -EINVAL;
94
goto fail;
95
}
96
97
rjob->tasks[i].regcmd = task.regcmd;
98
rjob->tasks[i].regcmd_count = task.regcmd_count;
99
}
100
101
return 0;
102
103
fail:
104
kvfree(rjob->tasks);
105
return ret;
106
}
107
108
static void rocket_job_hw_submit(struct rocket_core *core, struct rocket_job *job)
109
{
110
struct rocket_task *task;
111
unsigned int extra_bit;
112
113
/* Don't queue the job if a reset is in progress */
114
if (atomic_read(&core->reset.pending))
115
return;
116
117
/* GO ! */
118
119
task = &job->tasks[job->next_task_idx];
120
job->next_task_idx++;
121
122
rocket_pc_writel(core, BASE_ADDRESS, 0x1);
123
124
/* From rknpu, in the TRM this bit is marked as reserved */
125
extra_bit = 0x10000000 * core->index;
126
rocket_cna_writel(core, S_POINTER, CNA_S_POINTER_POINTER_PP_EN(1) |
127
CNA_S_POINTER_EXECUTER_PP_EN(1) |
128
CNA_S_POINTER_POINTER_PP_MODE(1) |
129
extra_bit);
130
131
rocket_core_writel(core, S_POINTER, CORE_S_POINTER_POINTER_PP_EN(1) |
132
CORE_S_POINTER_EXECUTER_PP_EN(1) |
133
CORE_S_POINTER_POINTER_PP_MODE(1) |
134
extra_bit);
135
136
rocket_pc_writel(core, BASE_ADDRESS, task->regcmd);
137
rocket_pc_writel(core, REGISTER_AMOUNTS,
138
PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT((task->regcmd_count + 1) / 2 - 1));
139
140
rocket_pc_writel(core, INTERRUPT_MASK, PC_INTERRUPT_MASK_DPU_0 | PC_INTERRUPT_MASK_DPU_1);
141
rocket_pc_writel(core, INTERRUPT_CLEAR, PC_INTERRUPT_CLEAR_DPU_0 | PC_INTERRUPT_CLEAR_DPU_1);
142
143
rocket_pc_writel(core, TASK_CON, PC_TASK_CON_RESERVED_0(1) |
144
PC_TASK_CON_TASK_COUNT_CLEAR(1) |
145
PC_TASK_CON_TASK_NUMBER(1) |
146
PC_TASK_CON_TASK_PP_EN(1));
147
148
rocket_pc_writel(core, TASK_DMA_BASE_ADDR, PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR(0x0));
149
150
rocket_pc_writel(core, OPERATION_ENABLE, PC_OPERATION_ENABLE_OP_EN(1));
151
152
dev_dbg(core->dev, "Submitted regcmd at 0x%llx to core %d", task->regcmd, core->index);
153
}
154
155
static int rocket_acquire_object_fences(struct drm_gem_object **bos,
156
int bo_count,
157
struct drm_sched_job *job,
158
bool is_write)
159
{
160
int i, ret;
161
162
for (i = 0; i < bo_count; i++) {
163
ret = dma_resv_reserve_fences(bos[i]->resv, 1);
164
if (ret)
165
return ret;
166
167
ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
168
is_write);
169
if (ret)
170
return ret;
171
}
172
173
return 0;
174
}
175
176
static void rocket_attach_object_fences(struct drm_gem_object **bos,
177
int bo_count,
178
struct dma_fence *fence)
179
{
180
int i;
181
182
for (i = 0; i < bo_count; i++)
183
dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
184
}
185
186
static int rocket_job_push(struct rocket_job *job)
187
{
188
struct rocket_device *rdev = job->rdev;
189
struct drm_gem_object **bos;
190
struct ww_acquire_ctx acquire_ctx;
191
int ret = 0;
192
193
bos = kvmalloc_array(job->in_bo_count + job->out_bo_count, sizeof(void *),
194
GFP_KERNEL);
195
memcpy(bos, job->in_bos, job->in_bo_count * sizeof(void *));
196
memcpy(&bos[job->in_bo_count], job->out_bos, job->out_bo_count * sizeof(void *));
197
198
ret = drm_gem_lock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
199
if (ret)
200
goto err;
201
202
scoped_guard(mutex, &rdev->sched_lock) {
203
drm_sched_job_arm(&job->base);
204
205
job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
206
207
ret = rocket_acquire_object_fences(job->in_bos, job->in_bo_count, &job->base, false);
208
if (ret)
209
goto err_unlock;
210
211
ret = rocket_acquire_object_fences(job->out_bos, job->out_bo_count, &job->base, true);
212
if (ret)
213
goto err_unlock;
214
215
kref_get(&job->refcount); /* put by scheduler job completion */
216
217
drm_sched_entity_push_job(&job->base);
218
}
219
220
rocket_attach_object_fences(job->out_bos, job->out_bo_count, job->inference_done_fence);
221
222
err_unlock:
223
drm_gem_unlock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
224
err:
225
kvfree(bos);
226
227
return ret;
228
}
229
230
static void rocket_job_cleanup(struct kref *ref)
231
{
232
struct rocket_job *job = container_of(ref, struct rocket_job,
233
refcount);
234
unsigned int i;
235
236
rocket_iommu_domain_put(job->domain);
237
238
dma_fence_put(job->done_fence);
239
dma_fence_put(job->inference_done_fence);
240
241
if (job->in_bos) {
242
for (i = 0; i < job->in_bo_count; i++)
243
drm_gem_object_put(job->in_bos[i]);
244
245
kvfree(job->in_bos);
246
}
247
248
if (job->out_bos) {
249
for (i = 0; i < job->out_bo_count; i++)
250
drm_gem_object_put(job->out_bos[i]);
251
252
kvfree(job->out_bos);
253
}
254
255
kvfree(job->tasks);
256
257
kfree(job);
258
}
259
260
static void rocket_job_put(struct rocket_job *job)
261
{
262
kref_put(&job->refcount, rocket_job_cleanup);
263
}
264
265
static void rocket_job_free(struct drm_sched_job *sched_job)
266
{
267
struct rocket_job *job = to_rocket_job(sched_job);
268
269
drm_sched_job_cleanup(sched_job);
270
271
rocket_job_put(job);
272
}
273
274
static struct rocket_core *sched_to_core(struct rocket_device *rdev,
275
struct drm_gpu_scheduler *sched)
276
{
277
unsigned int core;
278
279
for (core = 0; core < rdev->num_cores; core++) {
280
if (&rdev->cores[core].sched == sched)
281
return &rdev->cores[core];
282
}
283
284
return NULL;
285
}
286
287
static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
288
{
289
struct rocket_job *job = to_rocket_job(sched_job);
290
struct rocket_device *rdev = job->rdev;
291
struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
292
struct dma_fence *fence = NULL;
293
int ret;
294
295
if (unlikely(job->base.s_fence->finished.error))
296
return NULL;
297
298
/*
299
* Nothing to execute: can happen if the job has finished while
300
* we were resetting the NPU.
301
*/
302
if (job->next_task_idx == job->task_count)
303
return NULL;
304
305
fence = rocket_fence_create(core);
306
if (IS_ERR(fence))
307
return fence;
308
309
if (job->done_fence)
310
dma_fence_put(job->done_fence);
311
job->done_fence = dma_fence_get(fence);
312
313
ret = pm_runtime_get_sync(core->dev);
314
if (ret < 0)
315
return fence;
316
317
ret = iommu_attach_group(job->domain->domain, core->iommu_group);
318
if (ret < 0)
319
return fence;
320
321
scoped_guard(mutex, &core->job_lock) {
322
core->in_flight_job = job;
323
rocket_job_hw_submit(core, job);
324
}
325
326
return fence;
327
}
328
329
static void rocket_job_handle_irq(struct rocket_core *core)
330
{
331
pm_runtime_mark_last_busy(core->dev);
332
333
rocket_pc_writel(core, OPERATION_ENABLE, 0x0);
334
rocket_pc_writel(core, INTERRUPT_CLEAR, 0x1ffff);
335
336
scoped_guard(mutex, &core->job_lock)
337
if (core->in_flight_job) {
338
if (core->in_flight_job->next_task_idx < core->in_flight_job->task_count) {
339
rocket_job_hw_submit(core, core->in_flight_job);
340
return;
341
}
342
343
iommu_detach_group(NULL, iommu_group_get(core->dev));
344
dma_fence_signal(core->in_flight_job->done_fence);
345
pm_runtime_put_autosuspend(core->dev);
346
core->in_flight_job = NULL;
347
}
348
}
349
350
static void
351
rocket_reset(struct rocket_core *core, struct drm_sched_job *bad)
352
{
353
if (!atomic_read(&core->reset.pending))
354
return;
355
356
drm_sched_stop(&core->sched, bad);
357
358
/*
359
* Remaining interrupts have been handled, but we might still have
360
* stuck jobs. Let's make sure the PM counters stay balanced by
361
* manually calling pm_runtime_put_noidle().
362
*/
363
scoped_guard(mutex, &core->job_lock) {
364
if (core->in_flight_job)
365
pm_runtime_put_noidle(core->dev);
366
367
iommu_detach_group(NULL, core->iommu_group);
368
369
core->in_flight_job = NULL;
370
}
371
372
/* Proceed with reset now. */
373
rocket_core_reset(core);
374
375
/* NPU has been reset, we can clear the reset pending bit. */
376
atomic_set(&core->reset.pending, 0);
377
378
/* Restart the scheduler */
379
drm_sched_start(&core->sched, 0);
380
}
381
382
static enum drm_gpu_sched_stat rocket_job_timedout(struct drm_sched_job *sched_job)
383
{
384
struct rocket_job *job = to_rocket_job(sched_job);
385
struct rocket_device *rdev = job->rdev;
386
struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
387
388
dev_err(core->dev, "NPU job timed out");
389
390
atomic_set(&core->reset.pending, 1);
391
rocket_reset(core, sched_job);
392
393
return DRM_GPU_SCHED_STAT_RESET;
394
}
395
396
static void rocket_reset_work(struct work_struct *work)
397
{
398
struct rocket_core *core;
399
400
core = container_of(work, struct rocket_core, reset.work);
401
rocket_reset(core, NULL);
402
}
403
404
static const struct drm_sched_backend_ops rocket_sched_ops = {
405
.run_job = rocket_job_run,
406
.timedout_job = rocket_job_timedout,
407
.free_job = rocket_job_free
408
};
409
410
static irqreturn_t rocket_job_irq_handler_thread(int irq, void *data)
411
{
412
struct rocket_core *core = data;
413
414
rocket_job_handle_irq(core);
415
416
return IRQ_HANDLED;
417
}
418
419
static irqreturn_t rocket_job_irq_handler(int irq, void *data)
420
{
421
struct rocket_core *core = data;
422
u32 raw_status = rocket_pc_readl(core, INTERRUPT_RAW_STATUS);
423
424
WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR);
425
WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR);
426
427
if (!(raw_status & PC_INTERRUPT_RAW_STATUS_DPU_0 ||
428
raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1))
429
return IRQ_NONE;
430
431
rocket_pc_writel(core, INTERRUPT_MASK, 0x0);
432
433
return IRQ_WAKE_THREAD;
434
}
435
436
int rocket_job_init(struct rocket_core *core)
437
{
438
struct drm_sched_init_args args = {
439
.ops = &rocket_sched_ops,
440
.num_rqs = DRM_SCHED_PRIORITY_COUNT,
441
.credit_limit = 1,
442
.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
443
.name = dev_name(core->dev),
444
.dev = core->dev,
445
};
446
int ret;
447
448
INIT_WORK(&core->reset.work, rocket_reset_work);
449
spin_lock_init(&core->fence_lock);
450
mutex_init(&core->job_lock);
451
452
core->irq = platform_get_irq(to_platform_device(core->dev), 0);
453
if (core->irq < 0)
454
return core->irq;
455
456
ret = devm_request_threaded_irq(core->dev, core->irq,
457
rocket_job_irq_handler,
458
rocket_job_irq_handler_thread,
459
IRQF_SHARED, dev_name(core->dev),
460
core);
461
if (ret) {
462
dev_err(core->dev, "failed to request job irq");
463
return ret;
464
}
465
466
core->reset.wq = alloc_ordered_workqueue("rocket-reset-%d", 0, core->index);
467
if (!core->reset.wq)
468
return -ENOMEM;
469
470
core->fence_context = dma_fence_context_alloc(1);
471
472
args.timeout_wq = core->reset.wq;
473
ret = drm_sched_init(&core->sched, &args);
474
if (ret) {
475
dev_err(core->dev, "Failed to create scheduler: %d.", ret);
476
goto err_sched;
477
}
478
479
return 0;
480
481
err_sched:
482
drm_sched_fini(&core->sched);
483
484
destroy_workqueue(core->reset.wq);
485
return ret;
486
}
487
488
void rocket_job_fini(struct rocket_core *core)
489
{
490
drm_sched_fini(&core->sched);
491
492
cancel_work_sync(&core->reset.work);
493
destroy_workqueue(core->reset.wq);
494
}
495
496
int rocket_job_open(struct rocket_file_priv *rocket_priv)
497
{
498
struct rocket_device *rdev = rocket_priv->rdev;
499
struct drm_gpu_scheduler **scheds = kmalloc_array(rdev->num_cores,
500
sizeof(*scheds),
501
GFP_KERNEL);
502
unsigned int core;
503
int ret;
504
505
for (core = 0; core < rdev->num_cores; core++)
506
scheds[core] = &rdev->cores[core].sched;
507
508
ret = drm_sched_entity_init(&rocket_priv->sched_entity,
509
DRM_SCHED_PRIORITY_NORMAL,
510
scheds,
511
rdev->num_cores, NULL);
512
if (WARN_ON(ret))
513
return ret;
514
515
return 0;
516
}
517
518
void rocket_job_close(struct rocket_file_priv *rocket_priv)
519
{
520
struct drm_sched_entity *entity = &rocket_priv->sched_entity;
521
522
kfree(entity->sched_list);
523
drm_sched_entity_destroy(entity);
524
}
525
526
int rocket_job_is_idle(struct rocket_core *core)
527
{
528
/* If there are any jobs in this HW queue, we're not idle */
529
if (atomic_read(&core->sched.credit_count))
530
return false;
531
532
return true;
533
}
534
535
static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
536
struct drm_rocket_job *job)
537
{
538
struct rocket_device *rdev = to_rocket_device(dev);
539
struct rocket_file_priv *file_priv = file->driver_priv;
540
struct rocket_job *rjob = NULL;
541
int ret = 0;
542
543
if (job->task_count == 0)
544
return -EINVAL;
545
546
rjob = kzalloc(sizeof(*rjob), GFP_KERNEL);
547
if (!rjob)
548
return -ENOMEM;
549
550
kref_init(&rjob->refcount);
551
552
rjob->rdev = rdev;
553
554
ret = drm_sched_job_init(&rjob->base,
555
&file_priv->sched_entity,
556
1, NULL, file->client_id);
557
if (ret)
558
goto out_put_job;
559
560
ret = rocket_copy_tasks(dev, file, job, rjob);
561
if (ret)
562
goto out_cleanup_job;
563
564
ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->in_bo_handles),
565
job->in_bo_handle_count, &rjob->in_bos);
566
if (ret)
567
goto out_cleanup_job;
568
569
rjob->in_bo_count = job->in_bo_handle_count;
570
571
ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->out_bo_handles),
572
job->out_bo_handle_count, &rjob->out_bos);
573
if (ret)
574
goto out_cleanup_job;
575
576
rjob->out_bo_count = job->out_bo_handle_count;
577
578
rjob->domain = rocket_iommu_domain_get(file_priv);
579
580
ret = rocket_job_push(rjob);
581
if (ret)
582
goto out_cleanup_job;
583
584
out_cleanup_job:
585
if (ret)
586
drm_sched_job_cleanup(&rjob->base);
587
out_put_job:
588
rocket_job_put(rjob);
589
590
return ret;
591
}
592
593
int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
594
{
595
struct drm_rocket_submit *args = data;
596
struct drm_rocket_job *jobs;
597
int ret = 0;
598
unsigned int i = 0;
599
600
if (args->job_count == 0)
601
return 0;
602
603
if (args->job_struct_size < sizeof(struct drm_rocket_job)) {
604
drm_dbg(dev, "job_struct_size field in drm_rocket_submit struct is too small.\n");
605
return -EINVAL;
606
}
607
608
if (args->reserved != 0) {
609
drm_dbg(dev, "Reserved field in drm_rocket_submit struct should be 0.\n");
610
return -EINVAL;
611
}
612
613
jobs = kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL);
614
if (!jobs) {
615
drm_dbg(dev, "Failed to allocate incoming job array\n");
616
return -ENOMEM;
617
}
618
619
for (i = 0; i < args->job_count; i++) {
620
if (copy_from_user(&jobs[i],
621
u64_to_user_ptr(args->jobs) + i * args->job_struct_size,
622
sizeof(*jobs))) {
623
ret = -EFAULT;
624
drm_dbg(dev, "Failed to copy incoming job array\n");
625
goto exit;
626
}
627
}
628
629
630
for (i = 0; i < args->job_count; i++)
631
rocket_ioctl_submit_job(dev, file, &jobs[i]);
632
633
exit:
634
kvfree(jobs);
635
636
return ret;
637
}
638
639