Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
29285 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright 2025 Advanced Micro Devices, Inc.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice shall be included in
13
* all copies or substantial portions of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
* OTHER DEALINGS IN THE SOFTWARE.
22
*
23
*/
24
#include <linux/list.h>
25
#include "amdgpu.h"
26
27
static const guid_t MCE = CPER_NOTIFY_MCE;
28
static const guid_t CMC = CPER_NOTIFY_CMC;
29
static const guid_t BOOT = BOOT_TYPE;
30
31
static const guid_t CRASHDUMP = AMD_CRASHDUMP;
32
static const guid_t RUNTIME = AMD_GPU_NONSTANDARD_ERROR;
33
34
static void __inc_entry_length(struct cper_hdr *hdr, uint32_t size)
35
{
36
hdr->record_length += size;
37
}
38
39
static void amdgpu_cper_get_timestamp(struct cper_timestamp *timestamp)
40
{
41
struct tm tm;
42
time64_t now = ktime_get_real_seconds();
43
44
time64_to_tm(now, 0, &tm);
45
timestamp->seconds = tm.tm_sec;
46
timestamp->minutes = tm.tm_min;
47
timestamp->hours = tm.tm_hour;
48
timestamp->flag = 0;
49
timestamp->day = tm.tm_mday;
50
timestamp->month = 1 + tm.tm_mon;
51
timestamp->year = (1900 + tm.tm_year) % 100;
52
timestamp->century = (1900 + tm.tm_year) / 100;
53
}
54
55
void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
56
struct cper_hdr *hdr,
57
enum amdgpu_cper_type type,
58
enum cper_error_severity sev)
59
{
60
char record_id[16];
61
62
hdr->signature[0] = 'C';
63
hdr->signature[1] = 'P';
64
hdr->signature[2] = 'E';
65
hdr->signature[3] = 'R';
66
hdr->revision = CPER_HDR_REV_1;
67
hdr->signature_end = 0xFFFFFFFF;
68
hdr->error_severity = sev;
69
70
hdr->valid_bits.platform_id = 1;
71
hdr->valid_bits.timestamp = 1;
72
73
amdgpu_cper_get_timestamp(&hdr->timestamp);
74
75
snprintf(record_id, 9, "%d:%X",
76
(adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
77
adev->smuio.funcs->get_socket_id(adev) :
78
0,
79
atomic_inc_return(&adev->cper.unique_id));
80
memcpy(hdr->record_id, record_id, 8);
81
82
snprintf(hdr->platform_id, 16, "0x%04X:0x%04X",
83
adev->pdev->vendor, adev->pdev->device);
84
/* pmfw version should be part of creator_id according to CPER spec */
85
snprintf(hdr->creator_id, 16, "%s", CPER_CREATOR_ID_AMDGPU);
86
87
switch (type) {
88
case AMDGPU_CPER_TYPE_BOOT:
89
hdr->notify_type = BOOT;
90
break;
91
case AMDGPU_CPER_TYPE_FATAL:
92
case AMDGPU_CPER_TYPE_BP_THRESHOLD:
93
hdr->notify_type = MCE;
94
break;
95
case AMDGPU_CPER_TYPE_RUNTIME:
96
if (sev == CPER_SEV_NON_FATAL_CORRECTED)
97
hdr->notify_type = CMC;
98
else
99
hdr->notify_type = MCE;
100
break;
101
default:
102
dev_err(adev->dev, "Unknown CPER Type\n");
103
break;
104
}
105
106
__inc_entry_length(hdr, HDR_LEN);
107
}
108
109
static int amdgpu_cper_entry_fill_section_desc(struct amdgpu_device *adev,
110
struct cper_sec_desc *section_desc,
111
bool bp_threshold,
112
bool poison,
113
enum cper_error_severity sev,
114
guid_t sec_type,
115
uint32_t section_length,
116
uint32_t section_offset)
117
{
118
section_desc->revision_minor = CPER_SEC_MINOR_REV_1;
119
section_desc->revision_major = CPER_SEC_MAJOR_REV_22;
120
section_desc->sec_offset = section_offset;
121
section_desc->sec_length = section_length;
122
section_desc->valid_bits.fru_text = 1;
123
section_desc->flag_bits.primary = 1;
124
section_desc->severity = sev;
125
section_desc->sec_type = sec_type;
126
127
snprintf(section_desc->fru_text, 20, "OAM%d",
128
(adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
129
adev->smuio.funcs->get_socket_id(adev) :
130
0);
131
132
if (bp_threshold)
133
section_desc->flag_bits.exceed_err_threshold = 1;
134
if (poison)
135
section_desc->flag_bits.latent_err = 1;
136
137
return 0;
138
}
139
140
int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
141
struct cper_hdr *hdr,
142
uint32_t idx,
143
struct cper_sec_crashdump_reg_data reg_data)
144
{
145
struct cper_sec_desc *section_desc;
146
struct cper_sec_crashdump_fatal *section;
147
148
section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
149
section = (struct cper_sec_crashdump_fatal *)((uint8_t *)hdr +
150
FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
151
152
amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, false,
153
CPER_SEV_FATAL, CRASHDUMP, FATAL_SEC_LEN,
154
FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
155
156
section->body.reg_ctx_type = CPER_CTX_TYPE_CRASH;
157
section->body.reg_arr_size = sizeof(reg_data);
158
section->body.data = reg_data;
159
160
__inc_entry_length(hdr, SEC_DESC_LEN + FATAL_SEC_LEN);
161
162
return 0;
163
}
164
165
int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
166
struct cper_hdr *hdr,
167
uint32_t idx,
168
enum cper_error_severity sev,
169
uint32_t *reg_dump,
170
uint32_t reg_count)
171
{
172
struct cper_sec_desc *section_desc;
173
struct cper_sec_nonstd_err *section;
174
bool poison;
175
176
poison = sev != CPER_SEV_NON_FATAL_CORRECTED;
177
section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
178
section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
179
NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
180
181
amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, poison,
182
sev, RUNTIME, NONSTD_SEC_LEN,
183
NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
184
185
reg_count = umin(reg_count, CPER_ACA_REG_COUNT);
186
187
section->hdr.valid_bits.err_info_cnt = 1;
188
section->hdr.valid_bits.err_context_cnt = 1;
189
190
section->info.error_type = RUNTIME;
191
section->info.ms_chk_bits.err_type_valid = 1;
192
section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
193
section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
194
195
memcpy(section->ctx.reg_dump, reg_dump, reg_count * sizeof(uint32_t));
196
197
__inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
198
199
return 0;
200
}
201
202
int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
203
struct cper_hdr *hdr,
204
uint32_t idx)
205
{
206
struct cper_sec_desc *section_desc;
207
struct cper_sec_nonstd_err *section;
208
uint32_t socket_id;
209
210
section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
211
section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
212
NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
213
214
amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
215
CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN,
216
NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
217
218
section->hdr.valid_bits.err_info_cnt = 1;
219
section->hdr.valid_bits.err_context_cnt = 1;
220
221
section->info.error_type = RUNTIME;
222
section->info.valid_bits.ms_chk = 1;
223
section->info.ms_chk_bits.err_type_valid = 1;
224
section->info.ms_chk_bits.err_type = 1;
225
section->info.ms_chk_bits.pcc = 1;
226
section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
227
section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
228
229
/* Hardcoded Reg dump for bad page threshold CPER */
230
socket_id = (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
231
adev->smuio.funcs->get_socket_id(adev) :
232
0;
233
section->ctx.reg_dump[CPER_ACA_REG_CTL_LO] = 0x1;
234
section->ctx.reg_dump[CPER_ACA_REG_CTL_HI] = 0x0;
235
section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137;
236
section->ctx.reg_dump[CPER_ACA_REG_STATUS_HI] = 0xB0000000;
237
section->ctx.reg_dump[CPER_ACA_REG_ADDR_LO] = 0x0;
238
section->ctx.reg_dump[CPER_ACA_REG_ADDR_HI] = 0x0;
239
section->ctx.reg_dump[CPER_ACA_REG_MISC0_LO] = 0x0;
240
section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI] = 0x0;
241
section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2;
242
section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff;
243
section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = (socket_id / 4) & 0x01;
244
section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x096 | (((socket_id % 4) & 0x3) << 12);
245
section->ctx.reg_dump[CPER_ACA_REG_SYND_LO] = 0x0;
246
section->ctx.reg_dump[CPER_ACA_REG_SYND_HI] = 0x0;
247
248
__inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
249
250
return 0;
251
}
252
253
struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
254
enum amdgpu_cper_type type,
255
uint16_t section_count)
256
{
257
struct cper_hdr *hdr;
258
uint32_t size = 0;
259
260
size += HDR_LEN;
261
size += (SEC_DESC_LEN * section_count);
262
263
switch (type) {
264
case AMDGPU_CPER_TYPE_RUNTIME:
265
case AMDGPU_CPER_TYPE_BP_THRESHOLD:
266
size += (NONSTD_SEC_LEN * section_count);
267
break;
268
case AMDGPU_CPER_TYPE_FATAL:
269
size += (FATAL_SEC_LEN * section_count);
270
break;
271
case AMDGPU_CPER_TYPE_BOOT:
272
size += (BOOT_SEC_LEN * section_count);
273
break;
274
default:
275
dev_err(adev->dev, "Unknown CPER Type!\n");
276
return NULL;
277
}
278
279
hdr = kzalloc(size, GFP_KERNEL);
280
if (!hdr)
281
return NULL;
282
283
/* Save this early */
284
hdr->sec_cnt = section_count;
285
286
return hdr;
287
}
288
289
int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
290
struct aca_bank *bank)
291
{
292
struct cper_hdr *fatal = NULL;
293
struct cper_sec_crashdump_reg_data reg_data = { 0 };
294
struct amdgpu_ring *ring = &adev->cper.ring_buf;
295
int ret;
296
297
fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1);
298
if (!fatal) {
299
dev_err(adev->dev, "fail to alloc cper entry for ue record\n");
300
return -ENOMEM;
301
}
302
303
reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
304
reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
305
reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
306
reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
307
reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
308
reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
309
reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
310
reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
311
312
amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL);
313
ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data);
314
if (ret)
315
return ret;
316
317
amdgpu_cper_ring_write(ring, fatal, fatal->record_length);
318
kfree(fatal);
319
320
return 0;
321
}
322
323
int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev)
324
{
325
struct cper_hdr *bp_threshold = NULL;
326
struct amdgpu_ring *ring = &adev->cper.ring_buf;
327
int ret;
328
329
bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1);
330
if (!bp_threshold) {
331
dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n");
332
return -ENOMEM;
333
}
334
335
amdgpu_cper_entry_fill_hdr(adev, bp_threshold,
336
AMDGPU_CPER_TYPE_BP_THRESHOLD,
337
CPER_SEV_FATAL);
338
ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0);
339
if (ret)
340
return ret;
341
342
amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
343
kfree(bp_threshold);
344
345
return 0;
346
}
347
348
static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
349
enum aca_error_type aca_err_type)
350
{
351
switch (aca_err_type) {
352
case ACA_ERROR_TYPE_UE:
353
return CPER_SEV_FATAL;
354
case ACA_ERROR_TYPE_CE:
355
return CPER_SEV_NON_FATAL_CORRECTED;
356
case ACA_ERROR_TYPE_DEFERRED:
357
return CPER_SEV_NON_FATAL_UNCORRECTED;
358
default:
359
dev_err(adev->dev, "Unknown ACA error type!\n");
360
return CPER_SEV_FATAL;
361
}
362
}
363
364
int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
365
struct aca_banks *banks,
366
uint16_t bank_count)
367
{
368
struct cper_hdr *corrected = NULL;
369
enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED;
370
struct amdgpu_ring *ring = &adev->cper.ring_buf;
371
uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 };
372
struct aca_bank_node *node;
373
struct aca_bank *bank;
374
uint32_t i = 0;
375
int ret;
376
377
corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count);
378
if (!corrected) {
379
dev_err(adev->dev, "fail to allocate cper entry for ce records\n");
380
return -ENOMEM;
381
}
382
383
/* Raise severity if any DE is detected in the ACA bank list */
384
list_for_each_entry(node, &banks->list, node) {
385
bank = &node->bank;
386
if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
387
sev = CPER_SEV_NON_FATAL_UNCORRECTED;
388
break;
389
}
390
}
391
392
amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev);
393
394
/* Combine CE and DE in cper record */
395
list_for_each_entry(node, &banks->list, node) {
396
bank = &node->bank;
397
reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]);
398
reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]);
399
reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
400
reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
401
reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
402
reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
403
reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
404
reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
405
reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
406
reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
407
reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
408
reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
409
reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
410
reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
411
412
ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++,
413
amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type),
414
reg_data, CPER_ACA_REG_COUNT);
415
if (ret)
416
return ret;
417
}
418
419
amdgpu_cper_ring_write(ring, corrected, corrected->record_length);
420
kfree(corrected);
421
422
return 0;
423
}
424
425
static bool amdgpu_cper_is_hdr(struct amdgpu_ring *ring, u64 pos)
426
{
427
struct cper_hdr *chdr;
428
429
chdr = (struct cper_hdr *)&(ring->ring[pos]);
430
return strcmp(chdr->signature, "CPER") ? false : true;
431
}
432
433
static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos)
434
{
435
struct cper_hdr *chdr;
436
u64 p;
437
u32 chunk, rec_len = 0;
438
439
chdr = (struct cper_hdr *)&(ring->ring[pos]);
440
chunk = ring->ring_size - (pos << 2);
441
442
if (!strcmp(chdr->signature, "CPER")) {
443
rec_len = chdr->record_length;
444
goto calc;
445
}
446
447
/* ring buffer is not full, no cper data after ring->wptr */
448
if (ring->count_dw)
449
goto calc;
450
451
for (p = pos + 1; p <= ring->buf_mask; p++) {
452
chdr = (struct cper_hdr *)&(ring->ring[p]);
453
if (!strcmp(chdr->signature, "CPER")) {
454
rec_len = (p - pos) << 2;
455
goto calc;
456
}
457
}
458
459
calc:
460
if (!rec_len)
461
return chunk;
462
else
463
return umin(rec_len, chunk);
464
}
465
466
void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count)
467
{
468
u64 pos, wptr_old, rptr;
469
int rec_cnt_dw = count >> 2;
470
u32 chunk, ent_sz;
471
u8 *s = (u8 *)src;
472
473
if (count >= ring->ring_size - 4) {
474
dev_err(ring->adev->dev,
475
"CPER data size(%d) is larger than ring size(%d)\n",
476
count, ring->ring_size - 4);
477
478
return;
479
}
480
481
mutex_lock(&ring->adev->cper.ring_lock);
482
483
wptr_old = ring->wptr;
484
rptr = *ring->rptr_cpu_addr & ring->ptr_mask;
485
486
while (count) {
487
ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr);
488
chunk = umin(ent_sz, count);
489
490
memcpy(&ring->ring[ring->wptr], s, chunk);
491
492
ring->wptr += (chunk >> 2);
493
ring->wptr &= ring->ptr_mask;
494
count -= chunk;
495
s += chunk;
496
}
497
498
if (ring->count_dw < rec_cnt_dw)
499
ring->count_dw = 0;
500
501
/* the buffer is overflow, adjust rptr */
502
if (((wptr_old < rptr) && (rptr <= ring->wptr)) ||
503
((ring->wptr < wptr_old) && (wptr_old < rptr)) ||
504
((rptr <= ring->wptr) && (ring->wptr < wptr_old))) {
505
pos = (ring->wptr + 1) & ring->ptr_mask;
506
507
do {
508
ent_sz = amdgpu_cper_ring_get_ent_sz(ring, pos);
509
510
rptr += (ent_sz >> 2);
511
rptr &= ring->ptr_mask;
512
*ring->rptr_cpu_addr = rptr;
513
514
pos = rptr;
515
} while (!amdgpu_cper_is_hdr(ring, rptr));
516
}
517
518
if (ring->count_dw >= rec_cnt_dw)
519
ring->count_dw -= rec_cnt_dw;
520
mutex_unlock(&ring->adev->cper.ring_lock);
521
}
522
523
static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring)
524
{
525
return *(ring->rptr_cpu_addr);
526
}
527
528
static u64 amdgpu_cper_ring_get_wptr(struct amdgpu_ring *ring)
529
{
530
return ring->wptr;
531
}
532
533
static const struct amdgpu_ring_funcs cper_ring_funcs = {
534
.type = AMDGPU_RING_TYPE_CPER,
535
.align_mask = 0xff,
536
.support_64bit_ptrs = false,
537
.get_rptr = amdgpu_cper_ring_get_rptr,
538
.get_wptr = amdgpu_cper_ring_get_wptr,
539
};
540
541
static int amdgpu_cper_ring_init(struct amdgpu_device *adev)
542
{
543
struct amdgpu_ring *ring = &(adev->cper.ring_buf);
544
545
mutex_init(&adev->cper.ring_lock);
546
547
ring->adev = NULL;
548
ring->ring_obj = NULL;
549
ring->use_doorbell = false;
550
ring->no_scheduler = true;
551
ring->funcs = &cper_ring_funcs;
552
553
sprintf(ring->name, "cper");
554
return amdgpu_ring_init(adev, ring, CPER_MAX_RING_SIZE, NULL, 0,
555
AMDGPU_RING_PRIO_DEFAULT, NULL);
556
}
557
558
int amdgpu_cper_init(struct amdgpu_device *adev)
559
{
560
int r;
561
562
if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
563
return 0;
564
565
r = amdgpu_cper_ring_init(adev);
566
if (r) {
567
dev_err(adev->dev, "failed to initialize cper ring, r = %d\n", r);
568
return r;
569
}
570
571
mutex_init(&adev->cper.cper_lock);
572
573
adev->cper.enabled = true;
574
adev->cper.max_count = CPER_MAX_ALLOWED_COUNT;
575
576
return 0;
577
}
578
579
int amdgpu_cper_fini(struct amdgpu_device *adev)
580
{
581
if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
582
return 0;
583
584
adev->cper.enabled = false;
585
586
amdgpu_ring_fini(&(adev->cper.ring_buf));
587
adev->cper.count = 0;
588
adev->cper.wptr = 0;
589
590
return 0;
591
}
592
593