Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/edac/igen6_edac.c
54335 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Driver for Intel client SoC with integrated memory controller using IBECC
4
*
5
* Copyright (C) 2020 Intel Corporation
6
*
7
* The In-Band ECC (IBECC) IP provides ECC protection to all or specific
8
* regions of the physical memory space. It's used for memory controllers
9
* that don't support the out-of-band ECC which often needs an additional
10
* storage device to each channel for storing ECC data.
11
*/
12
13
#include <linux/module.h>
14
#include <linux/init.h>
15
#include <linux/pci.h>
16
#include <linux/slab.h>
17
#include <linux/irq_work.h>
18
#include <linux/llist.h>
19
#include <linux/genalloc.h>
20
#include <linux/edac.h>
21
#include <linux/bits.h>
22
#include <linux/bitfield.h>
23
#include <linux/io.h>
24
#include <asm/mach_traps.h>
25
#include <asm/nmi.h>
26
#include <asm/mce.h>
27
28
#include "edac_mc.h"
29
#include "edac_module.h"
30
31
#define IGEN6_REVISION "v2.5.1"
32
33
#define EDAC_MOD_STR "igen6_edac"
34
#define IGEN6_NMI_NAME "igen6_ibecc"
35
36
/* Debug macros */
37
#define igen6_printk(level, fmt, arg...) \
38
edac_printk(level, "igen6", fmt, ##arg)
39
40
#define igen6_mc_printk(mci, level, fmt, arg...) \
41
edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg)
42
43
#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
44
45
#define NUM_IMC 2 /* Max memory controllers */
46
#define NUM_CHANNELS 2 /* Max channels */
47
#define NUM_DIMMS 2 /* Max DIMMs per channel */
48
49
#define _4GB BIT_ULL(32)
50
51
/* Size of physical memory */
52
#define TOM_OFFSET 0xa0
53
/* Top of low usable DRAM */
54
#define TOLUD_OFFSET 0xbc
55
/* Capability register C */
56
#define CAPID_C_OFFSET 0xec
57
#define CAPID_C_IBECC BIT(15)
58
59
/* Capability register E */
60
#define CAPID_E_OFFSET 0xf0
61
#define CAPID_E_IBECC BIT(12)
62
#define CAPID_E_IBECC_BIT18 BIT(18)
63
64
/* Error Status */
65
#define ERRSTS_OFFSET 0xc8
66
#define ERRSTS_CE BIT_ULL(6)
67
#define ERRSTS_UE BIT_ULL(7)
68
69
/* Error Command */
70
#define ERRCMD_OFFSET 0xca
71
#define ERRCMD_CE BIT_ULL(6)
72
#define ERRCMD_UE BIT_ULL(7)
73
74
/* IBECC MMIO base address */
75
#define IBECC_BASE (res_cfg->ibecc_base)
76
#define IBECC_ACTIVATE_OFFSET IBECC_BASE
77
#define IBECC_ACTIVATE_EN BIT(0)
78
79
/* IBECC error log */
80
#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset)
81
#define ECC_ERROR_LOG_CE BIT_ULL(62)
82
#define ECC_ERROR_LOG_UE BIT_ULL(63)
83
#define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61)
84
85
/* Host MMIO base address */
86
#define MCHBAR_OFFSET 0x48
87
#define MCHBAR_EN BIT_ULL(0)
88
#define MCHBAR_SIZE 0x10000
89
90
/* Parameters for the channel decode stage */
91
#define IMC_BASE (res_cfg->imc_base)
92
#define MAD_INTER_CHANNEL_OFFSET IMC_BASE
93
#define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2)
94
#define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3)
95
#define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4)
96
#define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29)
97
98
/* Parameters for DRAM decode stage */
99
#define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4)
100
#define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0)
101
102
/* DIMM characteristics */
103
#define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc)
104
#define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29)
105
#define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8)
106
#define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29)
107
#define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25)
108
109
/* Hash for memory controller selection */
110
#define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8)
111
#define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3)
112
113
/* Hash for channel selection */
114
#define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24)
115
/* Hash for enhanced channel selection */
116
#define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28)
117
#define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
118
#define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
119
#define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28)
120
121
/* Parameters for memory slice decode stage */
122
#define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
123
#define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
124
125
static struct res_config {
126
bool machine_check;
127
/* The number of present memory controllers. */
128
int num_imc;
129
/* Host MMIO configuration */
130
u64 reg_mchbar_mask;
131
/* Top of memory */
132
u64 reg_tom_mask;
133
/* Top of upper usable DRAM */
134
u64 reg_touud_mask;
135
/* IBECC error log */
136
u64 reg_eccerrlog_addr_mask;
137
u32 imc_base;
138
u32 cmf_base;
139
u32 cmf_size;
140
u32 ms_hash_offset;
141
u32 ibecc_base;
142
u32 ibecc_error_log_offset;
143
bool (*ibecc_available)(struct pci_dev *pdev);
144
/* Extract error address logged in IBECC */
145
u64 (*err_addr)(u64 ecclog);
146
/* Convert error address logged in IBECC to system physical address */
147
u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
148
/* Convert error address logged in IBECC to integrated memory controller address */
149
u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
150
} *res_cfg;
151
152
struct igen6_imc {
153
int mc;
154
struct mem_ctl_info *mci;
155
struct pci_dev *pdev;
156
struct device dev;
157
void __iomem *window;
158
u64 size;
159
u64 ch_s_size;
160
int ch_l_map;
161
u64 dimm_s_size[NUM_CHANNELS];
162
u64 dimm_l_size[NUM_CHANNELS];
163
int dimm_l_map[NUM_CHANNELS];
164
};
165
166
static struct igen6_pvt {
167
struct igen6_imc imc[NUM_IMC];
168
u64 ms_hash;
169
u64 ms_s_size;
170
int ms_l_map;
171
} *igen6_pvt;
172
173
/* The top of low usable DRAM */
174
static u32 igen6_tolud;
175
/* The size of physical memory */
176
static u64 igen6_tom;
177
178
struct decoded_addr {
179
int mc;
180
u64 imc_addr;
181
u64 sys_addr;
182
int channel_idx;
183
u64 channel_addr;
184
int sub_channel_idx;
185
u64 sub_channel_addr;
186
};
187
188
struct ecclog_node {
189
struct llist_node llnode;
190
int mc;
191
u64 ecclog;
192
};
193
194
/*
195
* In the NMI handler, the driver uses the lock-less memory allocator
196
* to allocate memory to store the IBECC error logs and links the logs
197
* to the lock-less list. Delay printk() and the work of error reporting
198
* to EDAC core in a worker.
199
*/
200
#define ECCLOG_POOL_SIZE PAGE_SIZE
201
static LLIST_HEAD(ecclog_llist);
202
static struct gen_pool *ecclog_pool;
203
static char ecclog_buf[ECCLOG_POOL_SIZE];
204
static struct irq_work ecclog_irq_work;
205
static struct work_struct ecclog_work;
206
207
/* Compute die IDs for Elkhart Lake with IBECC */
208
#define DID_EHL_SKU5 0x4514
209
#define DID_EHL_SKU6 0x4528
210
#define DID_EHL_SKU7 0x452a
211
#define DID_EHL_SKU8 0x4516
212
#define DID_EHL_SKU9 0x452c
213
#define DID_EHL_SKU10 0x452e
214
#define DID_EHL_SKU11 0x4532
215
#define DID_EHL_SKU12 0x4518
216
#define DID_EHL_SKU13 0x451a
217
#define DID_EHL_SKU14 0x4534
218
#define DID_EHL_SKU15 0x4536
219
220
/* Compute die IDs for ICL-NNPI with IBECC */
221
#define DID_ICL_SKU8 0x4581
222
#define DID_ICL_SKU10 0x4585
223
#define DID_ICL_SKU11 0x4589
224
#define DID_ICL_SKU12 0x458d
225
226
/* Compute die IDs for Tiger Lake with IBECC */
227
#define DID_TGL_SKU 0x9a14
228
229
/* Compute die IDs for Alder Lake with IBECC */
230
#define DID_ADL_SKU1 0x4601
231
#define DID_ADL_SKU2 0x4602
232
#define DID_ADL_SKU3 0x4621
233
#define DID_ADL_SKU4 0x4641
234
235
/* Compute die IDs for Alder Lake-N with IBECC */
236
#define DID_ADL_N_SKU1 0x4614
237
#define DID_ADL_N_SKU2 0x4617
238
#define DID_ADL_N_SKU3 0x461b
239
#define DID_ADL_N_SKU4 0x461c
240
#define DID_ADL_N_SKU5 0x4673
241
#define DID_ADL_N_SKU6 0x4674
242
#define DID_ADL_N_SKU7 0x4675
243
#define DID_ADL_N_SKU8 0x4677
244
#define DID_ADL_N_SKU9 0x4678
245
#define DID_ADL_N_SKU10 0x4679
246
#define DID_ADL_N_SKU11 0x467c
247
#define DID_ADL_N_SKU12 0x4632
248
249
/* Compute die IDs for Arizona Beach with IBECC */
250
#define DID_AZB_SKU1 0x4676
251
252
/* Compute did IDs for Amston Lake with IBECC */
253
#define DID_ASL_SKU1 0x464a
254
#define DID_ASL_SKU2 0x4646
255
#define DID_ASL_SKU3 0x4652
256
257
/* Compute die IDs for Raptor Lake-P with IBECC */
258
#define DID_RPL_P_SKU1 0xa706
259
#define DID_RPL_P_SKU2 0xa707
260
#define DID_RPL_P_SKU3 0xa708
261
#define DID_RPL_P_SKU4 0xa716
262
#define DID_RPL_P_SKU5 0xa718
263
264
/* Compute die IDs for Meteor Lake-PS with IBECC */
265
#define DID_MTL_PS_SKU1 0x7d21
266
#define DID_MTL_PS_SKU2 0x7d22
267
#define DID_MTL_PS_SKU3 0x7d23
268
#define DID_MTL_PS_SKU4 0x7d24
269
270
/* Compute die IDs for Meteor Lake-P with IBECC */
271
#define DID_MTL_P_SKU1 0x7d01
272
#define DID_MTL_P_SKU2 0x7d02
273
#define DID_MTL_P_SKU3 0x7d14
274
275
/* Compute die IDs for Arrow Lake-UH with IBECC */
276
#define DID_ARL_UH_SKU1 0x7d06
277
#define DID_ARL_UH_SKU2 0x7d20
278
#define DID_ARL_UH_SKU3 0x7d30
279
280
/* Compute die IDs for Panther Lake-H with IBECC */
281
#define DID_PTL_H_SKU1 0xb000
282
#define DID_PTL_H_SKU2 0xb001
283
#define DID_PTL_H_SKU3 0xb002
284
#define DID_PTL_H_SKU4 0xb003
285
#define DID_PTL_H_SKU5 0xb004
286
#define DID_PTL_H_SKU6 0xb005
287
#define DID_PTL_H_SKU7 0xb008
288
#define DID_PTL_H_SKU8 0xb011
289
#define DID_PTL_H_SKU9 0xb014
290
#define DID_PTL_H_SKU10 0xb015
291
#define DID_PTL_H_SKU11 0xb028
292
#define DID_PTL_H_SKU12 0xb029
293
#define DID_PTL_H_SKU13 0xb02a
294
295
/* Compute die IDs for Wildcat Lake with IBECC */
296
#define DID_WCL_SKU1 0xfd00
297
298
static int get_mchbar(struct pci_dev *pdev, u64 *mchbar)
299
{
300
union {
301
u64 v;
302
struct {
303
u32 v_lo;
304
u32 v_hi;
305
};
306
} u;
307
308
if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) {
309
igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n");
310
return -ENODEV;
311
}
312
313
if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) {
314
igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n");
315
return -ENODEV;
316
}
317
318
if (!(u.v & MCHBAR_EN)) {
319
igen6_printk(KERN_ERR, "MCHBAR is disabled\n");
320
return -ENODEV;
321
}
322
323
*mchbar = u.v & res_cfg->reg_mchbar_mask;
324
edac_dbg(2, "MCHBAR 0x%llx (reg 0x%llx)\n", *mchbar, u.v);
325
326
return 0;
327
}
328
329
static bool ehl_ibecc_available(struct pci_dev *pdev)
330
{
331
u32 v;
332
333
if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
334
return false;
335
336
return !!(CAPID_C_IBECC & v);
337
}
338
339
static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
340
{
341
return eaddr;
342
}
343
344
static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
345
{
346
if (eaddr < igen6_tolud)
347
return eaddr;
348
349
if (igen6_tom <= _4GB)
350
return eaddr + igen6_tolud - _4GB;
351
352
if (eaddr >= igen6_tom)
353
return eaddr + igen6_tolud - igen6_tom;
354
355
return eaddr;
356
}
357
358
static bool icl_ibecc_available(struct pci_dev *pdev)
359
{
360
u32 v;
361
362
if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
363
return false;
364
365
return !(CAPID_C_IBECC & v) &&
366
(boot_cpu_data.x86_stepping >= 1);
367
}
368
369
static bool tgl_ibecc_available(struct pci_dev *pdev)
370
{
371
u32 v;
372
373
if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
374
return false;
375
376
return !(CAPID_E_IBECC & v);
377
}
378
379
static bool mtl_p_ibecc_available(struct pci_dev *pdev)
380
{
381
u32 v;
382
383
if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
384
return false;
385
386
return !(CAPID_E_IBECC_BIT18 & v);
387
}
388
389
static bool mtl_ps_ibecc_available(struct pci_dev *pdev)
390
{
391
#define MCHBAR_MEMSS_IBECCDIS 0x13c00
392
void __iomem *window;
393
u64 mchbar;
394
u32 val;
395
396
if (get_mchbar(pdev, &mchbar))
397
return false;
398
399
window = ioremap(mchbar, MCHBAR_SIZE * 2);
400
if (!window) {
401
igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
402
return false;
403
}
404
405
val = readl(window + MCHBAR_MEMSS_IBECCDIS);
406
iounmap(window);
407
408
/* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */
409
return !GET_BITFIELD(val, 6, 6);
410
}
411
412
static u64 mem_addr_to_sys_addr(u64 maddr)
413
{
414
if (maddr < igen6_tolud)
415
return maddr;
416
417
if (igen6_tom <= _4GB)
418
return maddr - igen6_tolud + _4GB;
419
420
if (maddr < _4GB)
421
return maddr - igen6_tolud + igen6_tom;
422
423
return maddr;
424
}
425
426
static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
427
{
428
u64 hash_addr = addr & mask, hash = hash_init;
429
u64 intlv = (addr >> intlv_bit) & 1;
430
int i;
431
432
for (i = 6; i < 20; i++)
433
hash ^= (hash_addr >> i) & 1;
434
435
return hash ^ intlv;
436
}
437
438
static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
439
{
440
u64 maddr, hash, mask, ms_s_size;
441
int intlv_bit;
442
u32 ms_hash;
443
444
ms_s_size = igen6_pvt->ms_s_size;
445
if (eaddr >= ms_s_size)
446
return eaddr + ms_s_size;
447
448
ms_hash = igen6_pvt->ms_hash;
449
450
mask = MEM_SLICE_HASH_MASK(ms_hash);
451
intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
452
453
maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
454
GET_BITFIELD(eaddr, 0, intlv_bit - 1);
455
456
hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
457
458
return maddr | (hash << intlv_bit);
459
}
460
461
static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
462
{
463
u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
464
465
return mem_addr_to_sys_addr(maddr);
466
}
467
468
static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
469
{
470
return eaddr;
471
}
472
473
static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
474
{
475
return mem_addr_to_sys_addr(eaddr);
476
}
477
478
static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
479
{
480
u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
481
struct igen6_imc *imc = &igen6_pvt->imc[mc];
482
int intlv_bit;
483
u32 mc_hash;
484
485
if (eaddr >= 2 * ms_s_size)
486
return eaddr - ms_s_size;
487
488
mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
489
490
intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
491
492
imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
493
GET_BITFIELD(eaddr, 0, intlv_bit - 1);
494
495
return imc_addr;
496
}
497
498
static u64 rpl_p_err_addr(u64 ecclog)
499
{
500
return field_get(res_cfg->reg_eccerrlog_addr_mask, ecclog);
501
}
502
503
static struct res_config ehl_cfg = {
504
.num_imc = 1,
505
.reg_mchbar_mask = GENMASK_ULL(38, 16),
506
.reg_tom_mask = GENMASK_ULL(38, 20),
507
.reg_touud_mask = GENMASK_ULL(38, 20),
508
.reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5),
509
.imc_base = 0x5000,
510
.ibecc_base = 0xdc00,
511
.ibecc_available = ehl_ibecc_available,
512
.ibecc_error_log_offset = 0x170,
513
.err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
514
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
515
};
516
517
static struct res_config icl_cfg = {
518
.num_imc = 1,
519
.reg_mchbar_mask = GENMASK_ULL(38, 16),
520
.reg_tom_mask = GENMASK_ULL(38, 20),
521
.reg_touud_mask = GENMASK_ULL(38, 20),
522
.reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5),
523
.imc_base = 0x5000,
524
.ibecc_base = 0xd800,
525
.ibecc_error_log_offset = 0x170,
526
.ibecc_available = icl_ibecc_available,
527
.err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
528
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
529
};
530
531
static struct res_config tgl_cfg = {
532
.machine_check = true,
533
.num_imc = 2,
534
.reg_mchbar_mask = GENMASK_ULL(38, 17),
535
.reg_tom_mask = GENMASK_ULL(38, 20),
536
.reg_touud_mask = GENMASK_ULL(38, 20),
537
.reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5),
538
.imc_base = 0x5000,
539
.cmf_base = 0x11000,
540
.cmf_size = 0x800,
541
.ms_hash_offset = 0xac,
542
.ibecc_base = 0xd400,
543
.ibecc_error_log_offset = 0x170,
544
.ibecc_available = tgl_ibecc_available,
545
.err_addr_to_sys_addr = tgl_err_addr_to_sys_addr,
546
.err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
547
};
548
549
static struct res_config adl_cfg = {
550
.machine_check = true,
551
.num_imc = 2,
552
.reg_mchbar_mask = GENMASK_ULL(41, 17),
553
.reg_tom_mask = GENMASK_ULL(41, 20),
554
.reg_touud_mask = GENMASK_ULL(41, 20),
555
.reg_eccerrlog_addr_mask = GENMASK_ULL(45, 5),
556
.imc_base = 0xd800,
557
.ibecc_base = 0xd400,
558
.ibecc_error_log_offset = 0x68,
559
.ibecc_available = tgl_ibecc_available,
560
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
561
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
562
};
563
564
static struct res_config adl_n_cfg = {
565
.machine_check = true,
566
.num_imc = 1,
567
.reg_mchbar_mask = GENMASK_ULL(41, 17),
568
.reg_tom_mask = GENMASK_ULL(41, 20),
569
.reg_touud_mask = GENMASK_ULL(41, 20),
570
.reg_eccerrlog_addr_mask = GENMASK_ULL(45, 5),
571
.imc_base = 0xd800,
572
.ibecc_base = 0xd400,
573
.ibecc_error_log_offset = 0x68,
574
.ibecc_available = tgl_ibecc_available,
575
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
576
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
577
};
578
579
static struct res_config rpl_p_cfg = {
580
.machine_check = true,
581
.num_imc = 2,
582
.reg_mchbar_mask = GENMASK_ULL(41, 17),
583
.reg_tom_mask = GENMASK_ULL(41, 20),
584
.reg_touud_mask = GENMASK_ULL(41, 20),
585
.reg_eccerrlog_addr_mask = GENMASK_ULL(45, 5),
586
.imc_base = 0xd800,
587
.ibecc_base = 0xd400,
588
.ibecc_error_log_offset = 0x68,
589
.ibecc_available = tgl_ibecc_available,
590
.err_addr = rpl_p_err_addr,
591
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
592
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
593
};
594
595
static struct res_config mtl_ps_cfg = {
596
.machine_check = true,
597
.num_imc = 2,
598
.reg_mchbar_mask = GENMASK_ULL(41, 17),
599
.reg_tom_mask = GENMASK_ULL(41, 20),
600
.reg_touud_mask = GENMASK_ULL(41, 20),
601
.reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5),
602
.imc_base = 0xd800,
603
.ibecc_base = 0xd400,
604
.ibecc_error_log_offset = 0x170,
605
.ibecc_available = mtl_ps_ibecc_available,
606
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
607
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
608
};
609
610
static struct res_config mtl_p_cfg = {
611
.machine_check = true,
612
.num_imc = 2,
613
.reg_mchbar_mask = GENMASK_ULL(41, 17),
614
.reg_tom_mask = GENMASK_ULL(41, 20),
615
.reg_touud_mask = GENMASK_ULL(41, 20),
616
.reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5),
617
.imc_base = 0xd800,
618
.ibecc_base = 0xd400,
619
.ibecc_error_log_offset = 0x170,
620
.ibecc_available = mtl_p_ibecc_available,
621
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
622
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
623
};
624
625
static struct res_config wcl_cfg = {
626
.machine_check = true,
627
.num_imc = 1,
628
.reg_mchbar_mask = GENMASK_ULL(41, 17),
629
.reg_tom_mask = GENMASK_ULL(41, 20),
630
.reg_touud_mask = GENMASK_ULL(41, 20),
631
.reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5),
632
.imc_base = 0xd800,
633
.ibecc_base = 0xd400,
634
.ibecc_error_log_offset = 0x170,
635
.ibecc_available = mtl_p_ibecc_available,
636
.err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
637
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
638
};
639
640
static struct pci_device_id igen6_pci_tbl[] = {
641
{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
642
{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
643
{ PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
644
{ PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg },
645
{ PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg },
646
{ PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg },
647
{ PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg },
648
{ PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg },
649
{ PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
650
{ PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
651
{ PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
652
{ PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
653
{ PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
654
{ PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
655
{ PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
656
{ PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
657
{ PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
658
{ PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
659
{ PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
660
{ PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
661
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg },
662
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg },
663
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg },
664
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg },
665
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg },
666
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg },
667
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg },
668
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg },
669
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg },
670
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
671
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
672
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
673
{ PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg },
674
{ PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg },
675
{ PCI_VDEVICE(INTEL, DID_ASL_SKU2), (kernel_ulong_t)&adl_n_cfg },
676
{ PCI_VDEVICE(INTEL, DID_ASL_SKU3), (kernel_ulong_t)&adl_n_cfg },
677
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
678
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
679
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
680
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg },
681
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg },
682
{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg },
683
{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg },
684
{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg },
685
{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg },
686
{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg },
687
{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg },
688
{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg },
689
{ PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg },
690
{ PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg },
691
{ PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg },
692
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg },
693
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg },
694
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg },
695
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU4), (kernel_ulong_t)&mtl_p_cfg },
696
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU5), (kernel_ulong_t)&mtl_p_cfg },
697
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU6), (kernel_ulong_t)&mtl_p_cfg },
698
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU7), (kernel_ulong_t)&mtl_p_cfg },
699
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU8), (kernel_ulong_t)&mtl_p_cfg },
700
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU9), (kernel_ulong_t)&mtl_p_cfg },
701
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU10), (kernel_ulong_t)&mtl_p_cfg },
702
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU11), (kernel_ulong_t)&mtl_p_cfg },
703
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU12), (kernel_ulong_t)&mtl_p_cfg },
704
{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU13), (kernel_ulong_t)&mtl_p_cfg },
705
{ PCI_VDEVICE(INTEL, DID_WCL_SKU1), (kernel_ulong_t)&wcl_cfg },
706
{ },
707
};
708
MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
709
710
static enum dev_type get_width(int dimm_l, u32 mad_dimm)
711
{
712
u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) :
713
MAD_DIMM_CH_DSW(mad_dimm);
714
715
switch (w) {
716
case 0:
717
return DEV_X8;
718
case 1:
719
return DEV_X16;
720
case 2:
721
return DEV_X32;
722
default:
723
return DEV_UNKNOWN;
724
}
725
}
726
727
static enum mem_type get_memory_type(u32 mad_inter)
728
{
729
u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter);
730
731
switch (t) {
732
case 0:
733
return MEM_DDR4;
734
case 1:
735
return MEM_DDR3;
736
case 2:
737
return MEM_LPDDR3;
738
case 3:
739
return MEM_LPDDR4;
740
case 4:
741
return MEM_WIO2;
742
default:
743
return MEM_UNKNOWN;
744
}
745
}
746
747
static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit)
748
{
749
u64 hash_addr = addr & mask, hash = 0;
750
u64 intlv = (addr >> intlv_bit) & 1;
751
int i;
752
753
for (i = 6; i < 20; i++)
754
hash ^= (hash_addr >> i) & 1;
755
756
return (int)hash ^ intlv;
757
}
758
759
static u64 decode_channel_addr(u64 addr, int intlv_bit)
760
{
761
u64 channel_addr;
762
763
/* Remove the interleave bit and shift upper part down to fill gap */
764
channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit;
765
channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1);
766
767
return channel_addr;
768
}
769
770
static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map,
771
int *idx, u64 *sub_addr)
772
{
773
int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6;
774
775
if (addr > 2 * s_size) {
776
*sub_addr = addr - s_size;
777
*idx = l_map;
778
return;
779
}
780
781
if (CHANNEL_HASH_MODE(hash)) {
782
*sub_addr = decode_channel_addr(addr, intlv_bit);
783
*idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit);
784
} else {
785
*sub_addr = decode_channel_addr(addr, 6);
786
*idx = GET_BITFIELD(addr, 6, 6);
787
}
788
}
789
790
static int igen6_decode(struct decoded_addr *res)
791
{
792
struct igen6_imc *imc = &igen6_pvt->imc[res->mc];
793
u64 addr = res->imc_addr, sub_addr, s_size;
794
int idx, l_map;
795
u32 hash;
796
797
if (addr >= igen6_tom) {
798
edac_dbg(0, "Address 0x%llx out of range\n", addr);
799
return -EINVAL;
800
}
801
802
/* Decode channel */
803
hash = readl(imc->window + CHANNEL_HASH_OFFSET);
804
s_size = imc->ch_s_size;
805
l_map = imc->ch_l_map;
806
decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr);
807
res->channel_idx = idx;
808
res->channel_addr = sub_addr;
809
810
/* Decode sub-channel/DIMM */
811
hash = readl(imc->window + CHANNEL_EHASH_OFFSET);
812
s_size = imc->dimm_s_size[idx];
813
l_map = imc->dimm_l_map[idx];
814
decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr);
815
res->sub_channel_idx = idx;
816
res->sub_channel_addr = sub_addr;
817
818
return 0;
819
}
820
821
static void igen6_output_error(struct decoded_addr *res,
822
struct mem_ctl_info *mci, u64 ecclog)
823
{
824
enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ?
825
HW_EVENT_ERR_UNCORRECTED :
826
HW_EVENT_ERR_CORRECTED;
827
828
edac_mc_handle_error(type, mci, 1,
829
res->sys_addr >> PAGE_SHIFT,
830
res->sys_addr & ~PAGE_MASK,
831
ECC_ERROR_LOG_SYND(ecclog),
832
res->channel_idx, res->sub_channel_idx,
833
-1, "", "");
834
}
835
836
static struct gen_pool *ecclog_gen_pool_create(void)
837
{
838
struct gen_pool *pool;
839
840
pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1);
841
if (!pool)
842
return NULL;
843
844
if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) {
845
gen_pool_destroy(pool);
846
return NULL;
847
}
848
849
return pool;
850
}
851
852
static int ecclog_gen_pool_add(int mc, u64 ecclog)
853
{
854
struct ecclog_node *node;
855
856
node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node));
857
if (!node)
858
return -ENOMEM;
859
860
node->mc = mc;
861
node->ecclog = ecclog;
862
llist_add(&node->llnode, &ecclog_llist);
863
864
return 0;
865
}
866
867
/*
868
* Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI
869
* configuration space status register ERRSTS can indicate whether a
870
* correctable error or an uncorrectable error occurred. We only use the
871
* ECC_ERROR_LOG register to check error type, but need to clear both
872
* registers to enable future error events.
873
*/
874
static u64 ecclog_read_and_clear(struct igen6_imc *imc)
875
{
876
u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
877
878
/*
879
* Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
880
* the invalid value ~0. This will result in a flood of invalid
881
* error reports in polling mode. Skip it.
882
*/
883
if (ecclog == ~0)
884
return 0;
885
886
/* Neither a CE nor a UE. Skip it.*/
887
if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
888
return 0;
889
890
/* Clear CE/UE bits by writing 1s */
891
writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
892
893
return ecclog;
894
}
895
896
static void errsts_clear(struct igen6_imc *imc)
897
{
898
u16 errsts;
899
900
if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) {
901
igen6_printk(KERN_ERR, "Failed to read ERRSTS\n");
902
return;
903
}
904
905
/* Clear CE/UE bits by writing 1s */
906
if (errsts & (ERRSTS_CE | ERRSTS_UE))
907
pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts);
908
}
909
910
static int errcmd_enable_error_reporting(bool enable)
911
{
912
struct igen6_imc *imc = &igen6_pvt->imc[0];
913
u16 errcmd;
914
int rc;
915
916
rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
917
if (rc)
918
return pcibios_err_to_errno(rc);
919
920
if (enable)
921
errcmd |= ERRCMD_CE | ERRSTS_UE;
922
else
923
errcmd &= ~(ERRCMD_CE | ERRSTS_UE);
924
925
rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
926
if (rc)
927
return pcibios_err_to_errno(rc);
928
929
return 0;
930
}
931
932
static int ecclog_handler(void)
933
{
934
struct igen6_imc *imc;
935
int i, n = 0;
936
u64 ecclog;
937
938
for (i = 0; i < res_cfg->num_imc; i++) {
939
imc = &igen6_pvt->imc[i];
940
941
/* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
942
943
ecclog = ecclog_read_and_clear(imc);
944
if (!ecclog)
945
continue;
946
947
if (!ecclog_gen_pool_add(i, ecclog))
948
irq_work_queue(&ecclog_irq_work);
949
950
n++;
951
}
952
953
return n;
954
}
955
956
static void ecclog_work_cb(struct work_struct *work)
957
{
958
struct ecclog_node *node, *tmp;
959
struct mem_ctl_info *mci;
960
struct llist_node *head;
961
struct decoded_addr res;
962
u64 eaddr;
963
964
head = llist_del_all(&ecclog_llist);
965
if (!head)
966
return;
967
968
llist_for_each_entry_safe(node, tmp, head, llnode) {
969
memset(&res, 0, sizeof(res));
970
if (res_cfg->err_addr)
971
eaddr = res_cfg->err_addr(node->ecclog);
972
else
973
eaddr = node->ecclog & res_cfg->reg_eccerrlog_addr_mask;
974
975
res.mc = node->mc;
976
res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
977
res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
978
979
mci = igen6_pvt->imc[res.mc].mci;
980
981
edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog);
982
igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n");
983
igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr);
984
985
if (!igen6_decode(&res))
986
igen6_output_error(&res, mci, node->ecclog);
987
988
gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node));
989
}
990
}
991
992
static void ecclog_irq_work_cb(struct irq_work *irq_work)
993
{
994
int i;
995
996
for (i = 0; i < res_cfg->num_imc; i++)
997
errsts_clear(&igen6_pvt->imc[i]);
998
999
if (!llist_empty(&ecclog_llist))
1000
schedule_work(&ecclog_work);
1001
}
1002
1003
static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1004
{
1005
unsigned char reason;
1006
1007
if (!ecclog_handler())
1008
return NMI_DONE;
1009
1010
/*
1011
* Both In-Band ECC correctable error and uncorrectable error are
1012
* reported by SERR# NMI. The NMI generic code (see pci_serr_error())
1013
* doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to
1014
* re-enable the SERR# NMI after NMI handling. So clear this bit here
1015
* to re-enable SERR# NMI for receiving future In-Band ECC errors.
1016
*/
1017
reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK;
1018
reason |= NMI_REASON_CLEAR_SERR;
1019
outb(reason, NMI_REASON_PORT);
1020
reason &= ~NMI_REASON_CLEAR_SERR;
1021
outb(reason, NMI_REASON_PORT);
1022
1023
return NMI_HANDLED;
1024
}
1025
1026
static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
1027
void *data)
1028
{
1029
struct mce *mce = (struct mce *)data;
1030
char *type;
1031
1032
if (mce->kflags & MCE_HANDLED_CEC)
1033
return NOTIFY_DONE;
1034
1035
/*
1036
* Ignore unless this is a memory related error.
1037
* We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
1038
* since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
1039
*/
1040
if ((mce->status & 0xefff) >> 7 != 1)
1041
return NOTIFY_DONE;
1042
1043
if (mce->mcgstatus & MCG_STATUS_MCIP)
1044
type = "Exception";
1045
else
1046
type = "Event";
1047
1048
edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
1049
mce->extcpu, type, mce->mcgstatus,
1050
mce->bank, mce->status);
1051
edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
1052
edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
1053
edac_dbg(0, "MISC 0x%llx\n", mce->misc);
1054
edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
1055
mce->cpuvendor, mce->cpuid, mce->time,
1056
mce->socketid, mce->apicid);
1057
/*
1058
* We just use the Machine Check for the memory error notification.
1059
* Each memory controller is associated with an IBECC instance.
1060
* Directly read and clear the error information(error address and
1061
* error type) on all the IBECC instances so that we know on which
1062
* memory controller the memory error(s) occurred.
1063
*/
1064
if (!ecclog_handler())
1065
return NOTIFY_DONE;
1066
1067
mce->kflags |= MCE_HANDLED_EDAC;
1068
1069
return NOTIFY_DONE;
1070
}
1071
1072
static struct notifier_block ecclog_mce_dec = {
1073
.notifier_call = ecclog_mce_handler,
1074
.priority = MCE_PRIO_EDAC,
1075
};
1076
1077
static bool igen6_check_ecc(struct igen6_imc *imc)
1078
{
1079
u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
1080
1081
return !!(activate & IBECC_ACTIVATE_EN);
1082
}
1083
1084
static int igen6_get_dimm_config(struct mem_ctl_info *mci)
1085
{
1086
struct igen6_imc *imc = mci->pvt_info;
1087
u32 mad_inter, mad_intra, mad_dimm;
1088
int i, j, ndimms, mc = imc->mc;
1089
struct dimm_info *dimm;
1090
enum mem_type mtype;
1091
enum dev_type dtype;
1092
u64 dsize;
1093
bool ecc;
1094
1095
edac_dbg(2, "\n");
1096
1097
mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET);
1098
mtype = get_memory_type(mad_inter);
1099
ecc = igen6_check_ecc(imc);
1100
imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter);
1101
imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter);
1102
1103
for (i = 0; i < NUM_CHANNELS; i++) {
1104
mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4);
1105
mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4);
1106
1107
imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
1108
imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
1109
imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
1110
imc->size += imc->dimm_s_size[i];
1111
imc->size += imc->dimm_l_size[i];
1112
ndimms = 0;
1113
1114
for (j = 0; j < NUM_DIMMS; j++) {
1115
dimm = edac_get_dimm(mci, i, j, 0);
1116
1117
if (j ^ imc->dimm_l_map[i]) {
1118
dtype = get_width(0, mad_dimm);
1119
dsize = imc->dimm_s_size[i];
1120
} else {
1121
dtype = get_width(1, mad_dimm);
1122
dsize = imc->dimm_l_size[i];
1123
}
1124
1125
if (!dsize)
1126
continue;
1127
1128
dimm->grain = 64;
1129
dimm->mtype = mtype;
1130
dimm->dtype = dtype;
1131
dimm->nr_pages = MiB_TO_PAGES(dsize >> 20);
1132
dimm->edac_mode = EDAC_SECDED;
1133
snprintf(dimm->label, sizeof(dimm->label),
1134
"MC#%d_Chan#%d_DIMM#%d", mc, i, j);
1135
edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n",
1136
mc, i, j, dsize >> 20, dimm->nr_pages);
1137
1138
ndimms++;
1139
}
1140
1141
if (ndimms && !ecc) {
1142
igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc);
1143
return -ENODEV;
1144
}
1145
}
1146
1147
edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
1148
1149
return 0;
1150
}
1151
1152
#ifdef CONFIG_EDAC_DEBUG
1153
/* Top of upper usable DRAM */
1154
static u64 igen6_touud;
1155
#define TOUUD_OFFSET 0xa8
1156
1157
static void igen6_reg_dump(struct igen6_imc *imc)
1158
{
1159
int i;
1160
1161
edac_dbg(2, "CHANNEL_HASH : 0x%x\n",
1162
readl(imc->window + CHANNEL_HASH_OFFSET));
1163
edac_dbg(2, "CHANNEL_EHASH : 0x%x\n",
1164
readl(imc->window + CHANNEL_EHASH_OFFSET));
1165
edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n",
1166
readl(imc->window + MAD_INTER_CHANNEL_OFFSET));
1167
edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n",
1168
readq(imc->window + ECC_ERROR_LOG_OFFSET));
1169
1170
for (i = 0; i < NUM_CHANNELS; i++) {
1171
edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i,
1172
readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4));
1173
edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i,
1174
readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4));
1175
}
1176
edac_dbg(2, "TOLUD : 0x%x", igen6_tolud);
1177
edac_dbg(2, "TOUUD : 0x%llx", igen6_touud);
1178
edac_dbg(2, "TOM : 0x%llx", igen6_tom);
1179
}
1180
1181
static struct dentry *igen6_test;
1182
1183
static int debugfs_u64_set(void *data, u64 val)
1184
{
1185
u64 ecclog;
1186
1187
if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) {
1188
edac_dbg(0, "Address 0x%llx out of range\n", val);
1189
return 0;
1190
}
1191
1192
pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
1193
1194
ecclog = (val & res_cfg->reg_eccerrlog_addr_mask) | ECC_ERROR_LOG_CE;
1195
1196
if (!ecclog_gen_pool_add(0, ecclog))
1197
irq_work_queue(&ecclog_irq_work);
1198
1199
return 0;
1200
}
1201
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
1202
1203
static void igen6_debug_setup(void)
1204
{
1205
igen6_test = edac_debugfs_create_dir("igen6_test");
1206
if (!igen6_test)
1207
return;
1208
1209
if (!edac_debugfs_create_file("addr", 0200, igen6_test,
1210
NULL, &fops_u64_wo)) {
1211
debugfs_remove(igen6_test);
1212
igen6_test = NULL;
1213
}
1214
}
1215
1216
static void igen6_debug_teardown(void)
1217
{
1218
debugfs_remove_recursive(igen6_test);
1219
}
1220
#else
1221
static void igen6_reg_dump(struct igen6_imc *imc) {}
1222
static void igen6_debug_setup(void) {}
1223
static void igen6_debug_teardown(void) {}
1224
#endif
1225
1226
static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
1227
{
1228
union {
1229
u64 v;
1230
struct {
1231
u32 v_lo;
1232
u32 v_hi;
1233
};
1234
} u;
1235
1236
edac_dbg(2, "\n");
1237
1238
if (!res_cfg->ibecc_available(pdev)) {
1239
edac_dbg(2, "No In-Band ECC IP\n");
1240
goto fail;
1241
}
1242
1243
if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) {
1244
igen6_printk(KERN_ERR, "Failed to read TOLUD\n");
1245
goto fail;
1246
}
1247
1248
igen6_tolud &= GENMASK(31, 20);
1249
1250
if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) {
1251
igen6_printk(KERN_ERR, "Failed to read lower TOM\n");
1252
goto fail;
1253
}
1254
1255
if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) {
1256
igen6_printk(KERN_ERR, "Failed to read upper TOM\n");
1257
goto fail;
1258
}
1259
1260
igen6_tom = u.v & res_cfg->reg_tom_mask;
1261
1262
if (get_mchbar(pdev, mchbar))
1263
goto fail;
1264
1265
#ifdef CONFIG_EDAC_DEBUG
1266
if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo))
1267
edac_dbg(2, "Failed to read lower TOUUD\n");
1268
else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi))
1269
edac_dbg(2, "Failed to read upper TOUUD\n");
1270
else
1271
igen6_touud = u.v & res_cfg->reg_touud_mask;
1272
#endif
1273
1274
return 0;
1275
fail:
1276
return -ENODEV;
1277
}
1278
1279
static void igen6_check(struct mem_ctl_info *mci)
1280
{
1281
struct igen6_imc *imc = mci->pvt_info;
1282
u64 ecclog;
1283
1284
/* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
1285
ecclog = ecclog_read_and_clear(imc);
1286
if (!ecclog)
1287
return;
1288
1289
if (!ecclog_gen_pool_add(imc->mc, ecclog))
1290
irq_work_queue(&ecclog_irq_work);
1291
}
1292
1293
/* Check whether the memory controller is absent. */
1294
static bool igen6_imc_absent(void __iomem *window)
1295
{
1296
return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0;
1297
}
1298
1299
static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev)
1300
{
1301
struct edac_mc_layer layers[2];
1302
struct mem_ctl_info *mci;
1303
struct igen6_imc *imc;
1304
int rc;
1305
1306
edac_dbg(2, "\n");
1307
1308
layers[0].type = EDAC_MC_LAYER_CHANNEL;
1309
layers[0].size = NUM_CHANNELS;
1310
layers[0].is_virt_csrow = false;
1311
layers[1].type = EDAC_MC_LAYER_SLOT;
1312
layers[1].size = NUM_DIMMS;
1313
layers[1].is_virt_csrow = true;
1314
1315
mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0);
1316
if (!mci) {
1317
rc = -ENOMEM;
1318
goto fail;
1319
}
1320
1321
mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc);
1322
if (!mci->ctl_name) {
1323
rc = -ENOMEM;
1324
goto fail2;
1325
}
1326
1327
mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4;
1328
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
1329
mci->edac_cap = EDAC_FLAG_SECDED;
1330
mci->mod_name = EDAC_MOD_STR;
1331
mci->dev_name = pci_name(pdev);
1332
if (edac_op_state == EDAC_OPSTATE_POLL)
1333
mci->edac_check = igen6_check;
1334
mci->pvt_info = &igen6_pvt->imc[mc];
1335
1336
imc = mci->pvt_info;
1337
device_initialize(&imc->dev);
1338
/*
1339
* EDAC core uses mci->pdev(pointer of structure device) as
1340
* memory controller ID. The client SoCs attach one or more
1341
* memory controllers to single pci_dev (single pci_dev->dev
1342
* can be for multiple memory controllers).
1343
*
1344
* To make mci->pdev unique, assign pci_dev->dev to mci->pdev
1345
* for the first memory controller and assign a unique imc->dev
1346
* to mci->pdev for each non-first memory controller.
1347
*/
1348
mci->pdev = mc ? &imc->dev : &pdev->dev;
1349
imc->mc = mc;
1350
imc->pdev = pdev;
1351
imc->window = window;
1352
1353
igen6_reg_dump(imc);
1354
1355
rc = igen6_get_dimm_config(mci);
1356
if (rc)
1357
goto fail3;
1358
1359
rc = edac_mc_add_mc(mci);
1360
if (rc) {
1361
igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc);
1362
goto fail3;
1363
}
1364
1365
imc->mci = mci;
1366
return 0;
1367
fail3:
1368
put_device(&imc->dev);
1369
mci->pvt_info = NULL;
1370
kfree(mci->ctl_name);
1371
fail2:
1372
edac_mc_free(mci);
1373
fail:
1374
return rc;
1375
}
1376
1377
static void igen6_unregister_mcis(void)
1378
{
1379
struct mem_ctl_info *mci;
1380
struct igen6_imc *imc;
1381
int i;
1382
1383
edac_dbg(2, "\n");
1384
1385
for (i = 0; i < res_cfg->num_imc; i++) {
1386
imc = &igen6_pvt->imc[i];
1387
mci = imc->mci;
1388
if (!mci)
1389
continue;
1390
1391
edac_mc_del_mc(mci->pdev);
1392
kfree(mci->ctl_name);
1393
mci->pvt_info = NULL;
1394
edac_mc_free(mci);
1395
put_device(&imc->dev);
1396
iounmap(imc->window);
1397
}
1398
}
1399
1400
static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
1401
{
1402
void __iomem *window;
1403
int lmc, pmc, rc;
1404
u64 base;
1405
1406
for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) {
1407
base = mchbar + pmc * MCHBAR_SIZE;
1408
window = ioremap(base, MCHBAR_SIZE);
1409
if (!window) {
1410
igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc);
1411
rc = -ENOMEM;
1412
goto out_unregister_mcis;
1413
}
1414
1415
if (igen6_imc_absent(window)) {
1416
iounmap(window);
1417
edac_dbg(2, "Skip absent mc%d\n", pmc);
1418
continue;
1419
}
1420
1421
rc = igen6_register_mci(lmc, window, pdev);
1422
if (rc)
1423
goto out_iounmap;
1424
1425
/* Done, if all present MCs are detected and registered. */
1426
if (++lmc >= res_cfg->num_imc)
1427
break;
1428
}
1429
1430
if (!lmc) {
1431
igen6_printk(KERN_ERR, "No mc found.\n");
1432
return -ENODEV;
1433
}
1434
1435
if (lmc < res_cfg->num_imc) {
1436
igen6_printk(KERN_DEBUG, "Expected %d mcs, but only %d detected.",
1437
res_cfg->num_imc, lmc);
1438
res_cfg->num_imc = lmc;
1439
}
1440
1441
return 0;
1442
1443
out_iounmap:
1444
iounmap(window);
1445
1446
out_unregister_mcis:
1447
igen6_unregister_mcis();
1448
1449
return rc;
1450
}
1451
1452
static int igen6_mem_slice_setup(u64 mchbar)
1453
{
1454
struct igen6_imc *imc = &igen6_pvt->imc[0];
1455
u64 base = mchbar + res_cfg->cmf_base;
1456
u32 offset = res_cfg->ms_hash_offset;
1457
u32 size = res_cfg->cmf_size;
1458
u64 ms_s_size, ms_hash;
1459
void __iomem *cmf;
1460
int ms_l_map;
1461
1462
edac_dbg(2, "\n");
1463
1464
if (imc[0].size < imc[1].size) {
1465
ms_s_size = imc[0].size;
1466
ms_l_map = 1;
1467
} else {
1468
ms_s_size = imc[1].size;
1469
ms_l_map = 0;
1470
}
1471
1472
igen6_pvt->ms_s_size = ms_s_size;
1473
igen6_pvt->ms_l_map = ms_l_map;
1474
1475
edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
1476
ms_s_size >> 20, ms_l_map);
1477
1478
if (!size)
1479
return 0;
1480
1481
cmf = ioremap(base, size);
1482
if (!cmf) {
1483
igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
1484
return -ENODEV;
1485
}
1486
1487
ms_hash = readq(cmf + offset);
1488
igen6_pvt->ms_hash = ms_hash;
1489
1490
edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
1491
1492
iounmap(cmf);
1493
1494
return 0;
1495
}
1496
1497
static int register_err_handler(void)
1498
{
1499
int rc;
1500
1501
if (res_cfg->machine_check) {
1502
mce_register_decode_chain(&ecclog_mce_dec);
1503
return 0;
1504
}
1505
1506
rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
1507
0, IGEN6_NMI_NAME);
1508
if (rc) {
1509
igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
1510
return rc;
1511
}
1512
1513
return 0;
1514
}
1515
1516
static void unregister_err_handler(void)
1517
{
1518
if (res_cfg->machine_check) {
1519
mce_unregister_decode_chain(&ecclog_mce_dec);
1520
return;
1521
}
1522
1523
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1524
}
1525
1526
static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent)
1527
{
1528
/*
1529
* Quirk: Certain SoCs' error reporting interrupts don't work.
1530
* Force polling mode for them to ensure that memory error
1531
* events can be handled.
1532
*/
1533
if (ent->device == DID_ADL_N_SKU4) {
1534
edac_op_state = EDAC_OPSTATE_POLL;
1535
return;
1536
}
1537
1538
/* Set the mode according to the configuration data. */
1539
if (cfg->machine_check)
1540
edac_op_state = EDAC_OPSTATE_INT;
1541
else
1542
edac_op_state = EDAC_OPSTATE_NMI;
1543
}
1544
1545
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1546
{
1547
u64 mchbar;
1548
int rc;
1549
1550
edac_dbg(2, "\n");
1551
1552
igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL);
1553
if (!igen6_pvt)
1554
return -ENOMEM;
1555
1556
res_cfg = (struct res_config *)ent->driver_data;
1557
1558
rc = igen6_pci_setup(pdev, &mchbar);
1559
if (rc)
1560
goto fail;
1561
1562
opstate_set(res_cfg, ent);
1563
1564
rc = igen6_register_mcis(pdev, mchbar);
1565
if (rc)
1566
goto fail;
1567
1568
if (res_cfg->num_imc > 1) {
1569
rc = igen6_mem_slice_setup(mchbar);
1570
if (rc)
1571
goto fail2;
1572
}
1573
1574
ecclog_pool = ecclog_gen_pool_create();
1575
if (!ecclog_pool) {
1576
rc = -ENOMEM;
1577
goto fail2;
1578
}
1579
1580
INIT_WORK(&ecclog_work, ecclog_work_cb);
1581
init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
1582
1583
rc = register_err_handler();
1584
if (rc)
1585
goto fail3;
1586
1587
/* Enable error reporting */
1588
rc = errcmd_enable_error_reporting(true);
1589
if (rc) {
1590
igen6_printk(KERN_ERR, "Failed to enable error reporting\n");
1591
goto fail4;
1592
}
1593
1594
/* Check if any pending errors before/during the registration of the error handler */
1595
ecclog_handler();
1596
1597
igen6_debug_setup();
1598
return 0;
1599
fail4:
1600
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1601
fail3:
1602
gen_pool_destroy(ecclog_pool);
1603
fail2:
1604
igen6_unregister_mcis();
1605
fail:
1606
kfree(igen6_pvt);
1607
return rc;
1608
}
1609
1610
static void igen6_remove(struct pci_dev *pdev)
1611
{
1612
edac_dbg(2, "\n");
1613
1614
igen6_debug_teardown();
1615
errcmd_enable_error_reporting(false);
1616
unregister_err_handler();
1617
irq_work_sync(&ecclog_irq_work);
1618
flush_work(&ecclog_work);
1619
gen_pool_destroy(ecclog_pool);
1620
igen6_unregister_mcis();
1621
kfree(igen6_pvt);
1622
}
1623
1624
static struct pci_driver igen6_driver = {
1625
.name = EDAC_MOD_STR,
1626
.probe = igen6_probe,
1627
.remove = igen6_remove,
1628
.id_table = igen6_pci_tbl,
1629
};
1630
1631
static int __init igen6_init(void)
1632
{
1633
const char *owner;
1634
int rc;
1635
1636
edac_dbg(2, "\n");
1637
1638
if (ghes_get_devices())
1639
return -EBUSY;
1640
1641
owner = edac_get_owner();
1642
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
1643
return -EBUSY;
1644
1645
rc = pci_register_driver(&igen6_driver);
1646
if (rc)
1647
return rc;
1648
1649
igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION);
1650
1651
return 0;
1652
}
1653
1654
static void __exit igen6_exit(void)
1655
{
1656
edac_dbg(2, "\n");
1657
1658
pci_unregister_driver(&igen6_driver);
1659
}
1660
1661
module_init(igen6_init);
1662
module_exit(igen6_exit);
1663
1664
MODULE_LICENSE("GPL v2");
1665
MODULE_AUTHOR("Qiuxu Zhuo");
1666
MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC");
1667
1668