Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/habanalabs/gaudi/gaudi.c
29286 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
/*
4
* Copyright 2016-2022 HabanaLabs, Ltd.
5
* All Rights Reserved.
6
*/
7
8
#include "gaudiP.h"
9
#include "../include/hw_ip/mmu/mmu_general.h"
10
#include "../include/hw_ip/mmu/mmu_v1_1.h"
11
#include "../include/gaudi/gaudi_masks.h"
12
#include "../include/gaudi/gaudi_fw_if.h"
13
#include "../include/gaudi/gaudi_reg_map.h"
14
#include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16
#include <linux/module.h>
17
#include <linux/pci.h>
18
#include <linux/firmware.h>
19
#include <linux/hwmon.h>
20
#include <linux/iommu.h>
21
#include <linux/seq_file.h>
22
23
/*
24
* Gaudi security scheme:
25
*
26
* 1. Host is protected by:
27
* - Range registers
28
* - MMU
29
*
30
* 2. DDR is protected by:
31
* - Range registers (protect the first 512MB)
32
*
33
* 3. Configuration is protected by:
34
* - Range registers
35
* - Protection bits
36
*
37
* MMU is always enabled.
38
*
39
* QMAN DMA channels 0,1 (PCI DMAN):
40
* - DMA is not secured.
41
* - PQ and CQ are secured.
42
* - CP is secured: The driver needs to parse CB but WREG should be allowed
43
* because of TDMA (tensor DMA). Hence, WREG is always not
44
* secured.
45
*
46
* When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47
* channel 0 to be secured, execute the DMA and change it back to not secured.
48
* Currently, the driver doesn't use the DMA while there are compute jobs
49
* running.
50
*
51
* The current use cases for the driver to use the DMA are:
52
* - Clear SRAM on context switch (happens on context switch when device is
53
* idle)
54
* - MMU page tables area clear (happens on init)
55
*
56
* QMAN DMA 2-7, TPC, MME, NIC:
57
* PQ is secured and is located on the Host (HBM CON TPC3 bug)
58
* CQ, CP and the engine are not secured
59
*
60
*/
61
62
#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63
#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64
#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66
MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67
MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68
MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69
70
#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
71
72
#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
73
#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
74
#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
75
#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
76
77
#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
78
#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
79
#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
80
#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
81
#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82
#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
83
#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
84
#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
85
#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
86
87
#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
88
89
#define GAUDI_MAX_STRING_LEN 20
90
91
#define GAUDI_CB_POOL_CB_CNT 512
92
#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
93
94
#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
95
96
#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
97
98
#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
99
100
#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
101
102
#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
103
104
#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
106
#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
107
108
#define MONITOR_SOB_STRING_SIZE 256
109
110
static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111
GAUDI_QUEUE_ID_DMA_0_0,
112
GAUDI_QUEUE_ID_DMA_0_1,
113
GAUDI_QUEUE_ID_DMA_0_2,
114
GAUDI_QUEUE_ID_DMA_0_3,
115
GAUDI_QUEUE_ID_DMA_1_0,
116
GAUDI_QUEUE_ID_DMA_1_1,
117
GAUDI_QUEUE_ID_DMA_1_2,
118
GAUDI_QUEUE_ID_DMA_1_3
119
};
120
121
static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122
[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123
[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124
[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125
[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126
[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127
[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128
[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129
[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130
};
131
132
static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133
[0] = GAUDI_QUEUE_ID_DMA_0_0,
134
[1] = GAUDI_QUEUE_ID_DMA_0_1,
135
[2] = GAUDI_QUEUE_ID_DMA_0_2,
136
[3] = GAUDI_QUEUE_ID_DMA_0_3,
137
[4] = GAUDI_QUEUE_ID_DMA_1_0,
138
[5] = GAUDI_QUEUE_ID_DMA_1_1,
139
[6] = GAUDI_QUEUE_ID_DMA_1_2,
140
[7] = GAUDI_QUEUE_ID_DMA_1_3,
141
};
142
143
static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144
[PACKET_WREG_32] = sizeof(struct packet_wreg32),
145
[PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
146
[PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
147
[PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
148
[PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
149
[PACKET_REPEAT] = sizeof(struct packet_repeat),
150
[PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
151
[PACKET_FENCE] = sizeof(struct packet_fence),
152
[PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
153
[PACKET_NOP] = sizeof(struct packet_nop),
154
[PACKET_STOP] = sizeof(struct packet_stop),
155
[PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
156
[PACKET_WAIT] = sizeof(struct packet_wait),
157
[PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
158
};
159
160
static inline bool validate_packet_id(enum packet_id id)
161
{
162
switch (id) {
163
case PACKET_WREG_32:
164
case PACKET_WREG_BULK:
165
case PACKET_MSG_LONG:
166
case PACKET_MSG_SHORT:
167
case PACKET_CP_DMA:
168
case PACKET_REPEAT:
169
case PACKET_MSG_PROT:
170
case PACKET_FENCE:
171
case PACKET_LIN_DMA:
172
case PACKET_NOP:
173
case PACKET_STOP:
174
case PACKET_ARB_POINT:
175
case PACKET_WAIT:
176
case PACKET_LOAD_AND_EXE:
177
return true;
178
default:
179
return false;
180
}
181
}
182
183
static const char * const
184
gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185
"tpc_address_exceed_slm",
186
"tpc_div_by_0",
187
"tpc_spu_mac_overflow",
188
"tpc_spu_addsub_overflow",
189
"tpc_spu_abs_overflow",
190
"tpc_spu_fp_dst_nan_inf",
191
"tpc_spu_fp_dst_denorm",
192
"tpc_vpu_mac_overflow",
193
"tpc_vpu_addsub_overflow",
194
"tpc_vpu_abs_overflow",
195
"tpc_vpu_fp_dst_nan_inf",
196
"tpc_vpu_fp_dst_denorm",
197
"tpc_assertions",
198
"tpc_illegal_instruction",
199
"tpc_pc_wrap_around",
200
"tpc_qm_sw_err",
201
"tpc_hbw_rresp_err",
202
"tpc_hbw_bresp_err",
203
"tpc_lbw_rresp_err",
204
"tpc_lbw_bresp_err"
205
};
206
207
static const char * const
208
gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209
"PQ AXI HBW error",
210
"CQ AXI HBW error",
211
"CP AXI HBW error",
212
"CP error due to undefined OPCODE",
213
"CP encountered STOP OPCODE",
214
"CP AXI LBW error",
215
"CP WRREG32 or WRBULK returned error",
216
"N/A",
217
"FENCE 0 inc over max value and clipped",
218
"FENCE 1 inc over max value and clipped",
219
"FENCE 2 inc over max value and clipped",
220
"FENCE 3 inc over max value and clipped",
221
"FENCE 0 dec under min value and clipped",
222
"FENCE 1 dec under min value and clipped",
223
"FENCE 2 dec under min value and clipped",
224
"FENCE 3 dec under min value and clipped"
225
};
226
227
static const char * const
228
gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229
"Choice push while full error",
230
"Choice Q watchdog error",
231
"MSG AXI LBW returned with error"
232
};
233
234
static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243
QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348
};
349
350
static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351
{ .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352
{ .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353
{ .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354
{ .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355
{ .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356
{ .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357
{ .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358
{ .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359
{ .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360
{ .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361
{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362
{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363
{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364
{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365
{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366
{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367
{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368
{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369
{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370
{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371
{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372
{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373
{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374
{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375
{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376
{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377
{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378
};
379
380
static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381
{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382
{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383
{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384
{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385
{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386
{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387
{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388
{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389
{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390
{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391
{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392
};
393
394
static s64 gaudi_state_dump_specs_props[] = {
395
[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396
[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397
[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398
[SP_MON_OBJ_WR_ADDR_LOW] =
399
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400
[SP_MON_OBJ_WR_ADDR_HIGH] =
401
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402
[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403
[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404
[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405
[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406
[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407
[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408
[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409
[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410
[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411
[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412
[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413
[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414
[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415
[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416
[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417
[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418
[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419
[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420
[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421
[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422
[SP_FENCE0_CNT_OFFSET] =
423
mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424
[SP_FENCE0_RDATA_OFFSET] =
425
mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426
[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427
[SP_NUM_CORES] = 1,
428
};
429
430
static const int gaudi_queue_id_to_engine_id[] = {
431
[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432
[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433
[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434
[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435
[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436
[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437
[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438
[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439
[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440
[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441
[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442
[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443
[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444
[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445
[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446
[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447
[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448
[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449
[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450
[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451
[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452
[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453
[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454
[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455
[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456
[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457
[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458
[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459
[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460
};
461
462
/* The order here is opposite to the order of the indexing in the h/w.
463
* i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464
*/
465
static const char * const gaudi_sync_manager_names[] = {
466
"SYNC_MGR_E_N",
467
"SYNC_MGR_W_N",
468
"SYNC_MGR_E_S",
469
"SYNC_MGR_W_S",
470
NULL
471
};
472
473
struct ecc_info_extract_params {
474
u64 block_address;
475
u32 num_memories;
476
bool derr;
477
};
478
479
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480
u64 phys_addr);
481
static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482
struct hl_cs_job *job);
483
static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484
u32 size, u64 val);
485
static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486
u32 num_regs, u32 val);
487
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488
u32 tpc_id);
489
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490
static int gaudi_cpucp_info_get(struct hl_device *hdev);
491
static void gaudi_disable_clock_gating(struct hl_device *hdev);
492
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494
u32 size, bool eb);
495
static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496
struct hl_gen_wait_properties *prop);
497
static inline enum hl_collective_mode
498
get_collective_mode(struct hl_device *hdev, u32 queue_id)
499
{
500
if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501
return HL_COLLECTIVE_MASTER;
502
503
if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504
queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505
return HL_COLLECTIVE_SLAVE;
506
507
if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508
queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509
return HL_COLLECTIVE_SLAVE;
510
511
if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512
queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513
return HL_COLLECTIVE_SLAVE;
514
515
return HL_COLLECTIVE_NOT_SUPPORTED;
516
}
517
518
static inline void set_default_power_values(struct hl_device *hdev)
519
{
520
struct asic_fixed_properties *prop = &hdev->asic_prop;
521
522
if (hdev->card_type == cpucp_card_type_pmc) {
523
prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524
525
if (prop->fw_security_enabled)
526
prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527
else
528
prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529
} else {
530
prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531
prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532
}
533
}
534
535
static int gaudi_set_fixed_properties(struct hl_device *hdev)
536
{
537
struct asic_fixed_properties *prop = &hdev->asic_prop;
538
u32 num_sync_stream_queues = 0;
539
int i;
540
541
prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542
prop->hw_queues_props = kcalloc(prop->max_queues,
543
sizeof(struct hw_queue_properties),
544
GFP_KERNEL);
545
546
if (!prop->hw_queues_props)
547
return -ENOMEM;
548
549
for (i = 0 ; i < prop->max_queues ; i++) {
550
if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552
prop->hw_queues_props[i].driver_only = 0;
553
prop->hw_queues_props[i].supports_sync_stream = 1;
554
prop->hw_queues_props[i].cb_alloc_flags =
555
CB_ALLOC_KERNEL;
556
num_sync_stream_queues++;
557
} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559
prop->hw_queues_props[i].driver_only = 1;
560
prop->hw_queues_props[i].supports_sync_stream = 0;
561
prop->hw_queues_props[i].cb_alloc_flags =
562
CB_ALLOC_KERNEL;
563
} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564
prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565
prop->hw_queues_props[i].driver_only = 0;
566
prop->hw_queues_props[i].supports_sync_stream = 0;
567
prop->hw_queues_props[i].cb_alloc_flags =
568
CB_ALLOC_USER;
569
570
}
571
prop->hw_queues_props[i].collective_mode =
572
get_collective_mode(hdev, i);
573
}
574
575
prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576
prop->cfg_base_address = CFG_BASE;
577
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578
prop->host_base_address = HOST_PHYS_BASE;
579
prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581
prop->completion_mode = HL_COMPLETION_MODE_JOB;
582
prop->collective_first_sob = 0;
583
prop->collective_first_mon = 0;
584
585
/* 2 SOBs per internal queue stream are reserved for collective */
586
prop->sync_stream_first_sob =
587
ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588
* QMAN_STREAMS * HL_RSVD_SOBS;
589
590
/* 1 monitor per internal queue stream are reserved for collective
591
* 2 monitors per external queue stream are reserved for collective
592
*/
593
prop->sync_stream_first_mon =
594
(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595
(NUMBER_OF_EXT_HW_QUEUES * 2);
596
597
prop->dram_base_address = DRAM_PHYS_BASE;
598
prop->dram_size = GAUDI_HBM_SIZE_32GB;
599
prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600
prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601
602
prop->sram_base_address = SRAM_BASE_ADDR;
603
prop->sram_size = SRAM_SIZE;
604
prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605
prop->sram_user_base_address =
606
prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607
608
prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609
prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610
611
prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612
if (hdev->pldm)
613
prop->mmu_pgt_size = 0x800000; /* 8MB */
614
else
615
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616
prop->mmu_pte_size = HL_PTE_SIZE;
617
prop->dram_page_size = PAGE_SIZE_2MB;
618
prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619
prop->dram_supports_virtual_memory = false;
620
621
prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622
prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623
prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624
prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625
prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626
prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627
prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628
prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629
prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630
prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631
prop->pmmu.start_addr = VA_HOST_SPACE_START;
632
prop->pmmu.end_addr =
633
(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634
prop->pmmu.page_size = PAGE_SIZE_4KB;
635
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636
prop->pmmu.last_mask = LAST_MASK;
637
/* TODO: will be duplicated until implementing per-MMU props */
638
prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
639
prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
640
641
/* PMMU and HPMMU are the same except of page size */
642
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643
prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644
645
/* shifts and masks are the same in PMMU and DMMU */
646
memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647
prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648
prop->dmmu.end_addr = VA_HOST_SPACE_END;
649
prop->dmmu.page_size = PAGE_SIZE_2MB;
650
prop->dmmu.pgt_size = prop->mmu_pgt_size;
651
652
prop->cfg_size = CFG_SIZE;
653
prop->max_asid = MAX_ASID;
654
prop->num_of_events = GAUDI_EVENT_SIZE;
655
prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
656
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657
658
set_default_power_values(hdev);
659
660
prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
661
prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662
663
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
664
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665
666
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
667
CARD_NAME_MAX_LEN);
668
669
prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670
671
prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
672
prop->sync_stream_first_sob +
673
(num_sync_stream_queues * HL_RSVD_SOBS);
674
prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
675
prop->sync_stream_first_mon +
676
(num_sync_stream_queues * HL_RSVD_MONS);
677
678
prop->first_available_user_interrupt = USHRT_MAX;
679
prop->tpc_interrupt_id = USHRT_MAX;
680
681
/* single msi */
682
prop->eq_interrupt_id = 0;
683
684
for (i = 0 ; i < HL_MAX_DCORES ; i++)
685
prop->first_available_cq[i] = USHRT_MAX;
686
687
prop->fw_cpu_boot_dev_sts0_valid = false;
688
prop->fw_cpu_boot_dev_sts1_valid = false;
689
prop->hard_reset_done_by_fw = false;
690
prop->gic_interrupts_enable = true;
691
692
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693
694
prop->clk_pll_index = HL_GAUDI_MME_PLL;
695
prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696
697
prop->use_get_power_for_reset_history = true;
698
699
prop->configurable_stop_on_err = true;
700
701
prop->set_max_power_on_device_init = true;
702
703
prop->dma_mask = 48;
704
705
prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706
707
return 0;
708
}
709
710
static int gaudi_pci_bars_map(struct hl_device *hdev)
711
{
712
static const char * const name[] = {"SRAM", "CFG", "HBM"};
713
bool is_wc[3] = {false, false, true};
714
int rc;
715
716
rc = hl_pci_bars_map(hdev, name, is_wc);
717
if (rc)
718
return rc;
719
720
hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721
(CFG_BASE - SPI_FLASH_BASE_ADDR);
722
723
return 0;
724
}
725
726
static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727
{
728
struct gaudi_device *gaudi = hdev->asic_specific;
729
struct hl_inbound_pci_region pci_region;
730
u64 old_addr = addr;
731
int rc;
732
733
if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734
return old_addr;
735
736
if (hdev->asic_prop.iatu_done_by_fw)
737
return U64_MAX;
738
739
/* Inbound Region 2 - Bar 4 - Point to HBM */
740
pci_region.mode = PCI_BAR_MATCH_MODE;
741
pci_region.bar = HBM_BAR_ID;
742
pci_region.addr = addr;
743
rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
744
if (rc)
745
return U64_MAX;
746
747
if (gaudi) {
748
old_addr = gaudi->hbm_bar_cur_addr;
749
gaudi->hbm_bar_cur_addr = addr;
750
}
751
752
return old_addr;
753
}
754
755
static int gaudi_init_iatu(struct hl_device *hdev)
756
{
757
struct hl_inbound_pci_region inbound_region;
758
struct hl_outbound_pci_region outbound_region;
759
int rc;
760
761
if (hdev->asic_prop.iatu_done_by_fw)
762
return 0;
763
764
/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765
inbound_region.mode = PCI_BAR_MATCH_MODE;
766
inbound_region.bar = SRAM_BAR_ID;
767
inbound_region.addr = SRAM_BASE_ADDR;
768
rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769
if (rc)
770
goto done;
771
772
/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773
inbound_region.mode = PCI_BAR_MATCH_MODE;
774
inbound_region.bar = CFG_BAR_ID;
775
inbound_region.addr = SPI_FLASH_BASE_ADDR;
776
rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777
if (rc)
778
goto done;
779
780
/* Inbound Region 2 - Bar 4 - Point to HBM */
781
inbound_region.mode = PCI_BAR_MATCH_MODE;
782
inbound_region.bar = HBM_BAR_ID;
783
inbound_region.addr = DRAM_PHYS_BASE;
784
rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785
if (rc)
786
goto done;
787
788
/* Outbound Region 0 - Point to Host */
789
outbound_region.addr = HOST_PHYS_BASE;
790
outbound_region.size = HOST_PHYS_SIZE;
791
rc = hl_pci_set_outbound_region(hdev, &outbound_region);
792
793
done:
794
return rc;
795
}
796
797
static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798
{
799
return RREG32(mmHW_STATE);
800
}
801
802
static int gaudi_early_init(struct hl_device *hdev)
803
{
804
struct asic_fixed_properties *prop = &hdev->asic_prop;
805
struct pci_dev *pdev = hdev->pdev;
806
resource_size_t pci_bar_size;
807
u32 fw_boot_status;
808
int rc;
809
810
rc = gaudi_set_fixed_properties(hdev);
811
if (rc) {
812
dev_err(hdev->dev, "Failed setting fixed properties\n");
813
return rc;
814
}
815
816
/* Check BAR sizes */
817
pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818
819
if (pci_bar_size != SRAM_BAR_SIZE) {
820
dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821
SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822
rc = -ENODEV;
823
goto free_queue_props;
824
}
825
826
pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827
828
if (pci_bar_size != CFG_BAR_SIZE) {
829
dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830
CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831
rc = -ENODEV;
832
goto free_queue_props;
833
}
834
835
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836
hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837
838
/* If FW security is enabled at this point it means no access to ELBI */
839
if (hdev->asic_prop.fw_security_enabled) {
840
hdev->asic_prop.iatu_done_by_fw = true;
841
842
/*
843
* GIC-security-bit can ONLY be set by CPUCP, so in this stage
844
* decision can only be taken based on PCI ID security.
845
*/
846
hdev->asic_prop.gic_interrupts_enable = false;
847
goto pci_init;
848
}
849
850
rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851
&fw_boot_status);
852
if (rc)
853
goto free_queue_props;
854
855
/* Check whether FW is configuring iATU */
856
if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857
(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858
hdev->asic_prop.iatu_done_by_fw = true;
859
860
pci_init:
861
rc = hl_pci_init(hdev);
862
if (rc)
863
goto free_queue_props;
864
865
/* Before continuing in the initialization, we need to read the preboot
866
* version to determine whether we run with a security-enabled firmware
867
*/
868
rc = hl_fw_read_preboot_status(hdev);
869
if (rc) {
870
if (hdev->reset_on_preboot_fail)
871
/* we are already on failure flow, so don't check if hw_fini fails. */
872
hdev->asic_funcs->hw_fini(hdev, true, false);
873
goto pci_fini;
874
}
875
876
if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877
dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878
rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879
if (rc) {
880
dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881
goto pci_fini;
882
}
883
}
884
885
return 0;
886
887
pci_fini:
888
hl_pci_fini(hdev);
889
free_queue_props:
890
kfree(hdev->asic_prop.hw_queues_props);
891
return rc;
892
}
893
894
static int gaudi_early_fini(struct hl_device *hdev)
895
{
896
kfree(hdev->asic_prop.hw_queues_props);
897
hl_pci_fini(hdev);
898
899
return 0;
900
}
901
902
/**
903
* gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904
*
905
* @hdev: pointer to hl_device structure
906
*
907
*/
908
static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909
{
910
u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911
struct asic_fixed_properties *prop = &hdev->asic_prop;
912
u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913
int rc;
914
915
if ((hdev->fw_components & FW_TYPE_LINUX) &&
916
(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917
struct gaudi_device *gaudi = hdev->asic_specific;
918
919
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920
return 0;
921
922
rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
923
924
if (rc)
925
return rc;
926
927
freq = pll_freq_arr[2];
928
} else {
929
/* Backward compatibility */
930
div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931
div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932
nr = RREG32(mmPSOC_CPU_PLL_NR);
933
nf = RREG32(mmPSOC_CPU_PLL_NF);
934
od = RREG32(mmPSOC_CPU_PLL_OD);
935
936
if (div_sel == DIV_SEL_REF_CLK ||
937
div_sel == DIV_SEL_DIVIDED_REF) {
938
if (div_sel == DIV_SEL_REF_CLK)
939
freq = PLL_REF_CLK;
940
else
941
freq = PLL_REF_CLK / (div_fctr + 1);
942
} else if (div_sel == DIV_SEL_PLL_CLK ||
943
div_sel == DIV_SEL_DIVIDED_PLL) {
944
pll_clk = PLL_REF_CLK * (nf + 1) /
945
((nr + 1) * (od + 1));
946
if (div_sel == DIV_SEL_PLL_CLK)
947
freq = pll_clk;
948
else
949
freq = pll_clk / (div_fctr + 1);
950
} else {
951
dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952
freq = 0;
953
}
954
}
955
956
prop->psoc_timestamp_frequency = freq;
957
prop->psoc_pci_pll_nr = nr;
958
prop->psoc_pci_pll_nf = nf;
959
prop->psoc_pci_pll_od = od;
960
prop->psoc_pci_pll_div_factor = div_fctr;
961
962
return 0;
963
}
964
965
static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966
dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967
{
968
struct asic_fixed_properties *prop = &hdev->asic_prop;
969
struct packet_lin_dma *init_tpc_mem_pkt;
970
struct hl_cs_job *job;
971
struct hl_cb *cb;
972
u64 dst_addr;
973
u32 cb_size, ctl;
974
u8 tpc_id;
975
int rc;
976
977
cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
978
if (!cb)
979
return -EFAULT;
980
981
init_tpc_mem_pkt = cb->kernel_address;
982
cb_size = sizeof(*init_tpc_mem_pkt);
983
memset(init_tpc_mem_pkt, 0, cb_size);
984
985
init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986
987
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991
992
init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993
994
init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995
996
/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997
dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998
round_up(prop->sram_user_base_address, SZ_8K));
999
init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000
1001
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002
if (!job) {
1003
dev_err(hdev->dev, "Failed to allocate a new job\n");
1004
rc = -ENOMEM;
1005
goto release_cb;
1006
}
1007
1008
job->id = 0;
1009
job->user_cb = cb;
1010
atomic_inc(&job->user_cb->cs_cnt);
1011
job->user_cb_size = cb_size;
1012
job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013
job->patched_cb = job->user_cb;
1014
job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015
1016
hl_debugfs_add_job(hdev, job);
1017
1018
rc = gaudi_send_job_on_qman0(hdev, job);
1019
1020
if (rc)
1021
goto free_job;
1022
1023
for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024
rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1025
if (rc)
1026
break;
1027
}
1028
1029
free_job:
1030
hl_userptr_delete_list(hdev, &job->userptr_list);
1031
hl_debugfs_remove_job(hdev, job);
1032
kfree(job);
1033
atomic_dec(&cb->cs_cnt);
1034
1035
release_cb:
1036
hl_cb_put(cb);
1037
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1038
1039
return rc;
1040
}
1041
1042
/*
1043
* gaudi_init_tpc_mem() - Initialize TPC memories.
1044
* @hdev: Pointer to hl_device structure.
1045
*
1046
* Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047
*
1048
* Return: 0 for success, negative value for error.
1049
*/
1050
static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051
{
1052
const struct firmware *fw;
1053
size_t fw_size;
1054
void *cpu_addr;
1055
dma_addr_t dma_handle;
1056
int rc, count = 5;
1057
1058
again:
1059
rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1060
if (rc == -EINTR && count-- > 0) {
1061
msleep(50);
1062
goto again;
1063
}
1064
1065
if (rc) {
1066
dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067
GAUDI_TPC_FW_FILE);
1068
goto out;
1069
}
1070
1071
fw_size = fw->size;
1072
cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073
if (!cpu_addr) {
1074
dev_err(hdev->dev,
1075
"Failed to allocate %zu of dma memory for TPC kernel\n",
1076
fw_size);
1077
rc = -ENOMEM;
1078
goto out;
1079
}
1080
1081
memcpy(cpu_addr, fw->data, fw_size);
1082
1083
rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084
1085
hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086
1087
out:
1088
release_firmware(fw);
1089
return rc;
1090
}
1091
1092
static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093
{
1094
struct gaudi_device *gaudi = hdev->asic_specific;
1095
struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096
struct hl_hw_queue *q;
1097
u32 i, sob_id, sob_group_id, queue_id;
1098
1099
/* Iterate through SOB groups and assign a SOB for each slave queue */
1100
sob_group_id =
1101
stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102
sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103
1104
queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105
for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106
q = &hdev->kernel_queues[queue_id + (4 * i)];
1107
q->sync_stream_prop.collective_sob_id = sob_id + i;
1108
}
1109
1110
/* Both DMA5 and TPC7 use the same resources since only a single
1111
* engine need to participate in the reduction process
1112
*/
1113
queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114
q = &hdev->kernel_queues[queue_id];
1115
q->sync_stream_prop.collective_sob_id =
1116
sob_id + NIC_NUMBER_OF_ENGINES;
1117
1118
queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119
q = &hdev->kernel_queues[queue_id];
1120
q->sync_stream_prop.collective_sob_id =
1121
sob_id + NIC_NUMBER_OF_ENGINES;
1122
}
1123
1124
static void gaudi_sob_group_hw_reset(struct kref *ref)
1125
{
1126
struct gaudi_hw_sob_group *hw_sob_group =
1127
container_of(ref, struct gaudi_hw_sob_group, kref);
1128
struct hl_device *hdev = hw_sob_group->hdev;
1129
int i;
1130
1131
for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132
WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133
(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134
1135
kref_init(&hw_sob_group->kref);
1136
}
1137
1138
static void gaudi_sob_group_reset_error(struct kref *ref)
1139
{
1140
struct gaudi_hw_sob_group *hw_sob_group =
1141
container_of(ref, struct gaudi_hw_sob_group, kref);
1142
struct hl_device *hdev = hw_sob_group->hdev;
1143
1144
dev_crit(hdev->dev,
1145
"SOB release shouldn't be called here, base_sob_id: %d\n",
1146
hw_sob_group->base_sob_id);
1147
}
1148
1149
static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150
{
1151
struct gaudi_collective_properties *prop;
1152
int i;
1153
1154
prop = &gaudi->collective_props;
1155
1156
memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157
1158
for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159
if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160
prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161
BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162
/* Set collective engine bit */
1163
prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164
BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165
}
1166
1167
static int gaudi_collective_init(struct hl_device *hdev)
1168
{
1169
u32 i, sob_id, reserved_sobs_per_group;
1170
struct gaudi_collective_properties *prop;
1171
struct gaudi_device *gaudi;
1172
1173
gaudi = hdev->asic_specific;
1174
prop = &gaudi->collective_props;
1175
sob_id = hdev->asic_prop.collective_first_sob;
1176
1177
/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178
reserved_sobs_per_group =
1179
ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180
1181
/* Init SOB groups */
1182
for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183
prop->hw_sob_group[i].hdev = hdev;
1184
prop->hw_sob_group[i].base_sob_id = sob_id;
1185
sob_id += reserved_sobs_per_group;
1186
gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1187
}
1188
1189
for (i = 0 ; i < QMAN_STREAMS; i++) {
1190
prop->next_sob_group_val[i] = 1;
1191
prop->curr_sob_group_idx[i] = 0;
1192
gaudi_collective_map_sobs(hdev, i);
1193
}
1194
1195
gaudi_collective_mstr_sob_mask_set(gaudi);
1196
1197
return 0;
1198
}
1199
1200
static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201
{
1202
struct gaudi_device *gaudi = hdev->asic_specific;
1203
struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204
1205
kref_put(&cprop->hw_sob_group[sob_group].kref,
1206
gaudi_sob_group_hw_reset);
1207
}
1208
1209
static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210
struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211
{
1212
u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213
struct gaudi_collective_properties *cprop;
1214
struct hl_gen_wait_properties wait_prop;
1215
struct hl_sync_stream_properties *prop;
1216
struct gaudi_device *gaudi;
1217
1218
gaudi = hdev->asic_specific;
1219
cprop = &gaudi->collective_props;
1220
queue_id = job->hw_queue_id;
1221
prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222
1223
master_sob_base =
1224
cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225
master_monitor = prop->collective_mstr_mon_id[0];
1226
1227
cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228
1229
dev_dbg(hdev->dev,
1230
"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231
master_sob_base, cprop->mstr_sob_mask[0],
1232
cprop->next_sob_group_val[stream],
1233
master_monitor, queue_id);
1234
1235
wait_prop.data = (void *) job->patched_cb;
1236
wait_prop.sob_base = master_sob_base;
1237
wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238
wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239
wait_prop.mon_id = master_monitor;
1240
wait_prop.q_idx = queue_id;
1241
wait_prop.size = cb_size;
1242
cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243
1244
master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245
master_monitor = prop->collective_mstr_mon_id[1];
1246
1247
dev_dbg(hdev->dev,
1248
"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249
master_sob_base, cprop->mstr_sob_mask[1],
1250
cprop->next_sob_group_val[stream],
1251
master_monitor, queue_id);
1252
1253
wait_prop.sob_base = master_sob_base;
1254
wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255
wait_prop.mon_id = master_monitor;
1256
wait_prop.size = cb_size;
1257
cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1258
}
1259
1260
static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261
struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262
{
1263
struct hl_gen_wait_properties wait_prop;
1264
struct hl_sync_stream_properties *prop;
1265
u32 queue_id, cb_size = 0;
1266
1267
queue_id = job->hw_queue_id;
1268
prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269
1270
if (job->cs->encaps_signals) {
1271
/* use the encaps signal handle store earlier in the flow
1272
* and set the SOB information from the encaps
1273
* signals handle
1274
*/
1275
hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1276
cs_cmpl);
1277
1278
dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1279
job->cs->sequence,
1280
cs_cmpl->hw_sob->sob_id,
1281
cs_cmpl->sob_val);
1282
}
1283
1284
/* Add to wait CBs using slave monitor */
1285
wait_prop.data = (void *) job->user_cb;
1286
wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287
wait_prop.sob_mask = 0x1;
1288
wait_prop.sob_val = cs_cmpl->sob_val;
1289
wait_prop.mon_id = prop->collective_slave_mon_id;
1290
wait_prop.q_idx = queue_id;
1291
wait_prop.size = cb_size;
1292
1293
dev_dbg(hdev->dev,
1294
"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296
prop->collective_slave_mon_id, queue_id);
1297
1298
cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1299
1300
dev_dbg(hdev->dev,
1301
"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302
prop->collective_sob_id, queue_id);
1303
1304
cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1305
prop->collective_sob_id, cb_size, false);
1306
}
1307
1308
static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309
{
1310
struct hl_cs_compl *signal_cs_cmpl =
1311
container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312
struct hl_cs_compl *cs_cmpl =
1313
container_of(cs->fence, struct hl_cs_compl, base_fence);
1314
struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315
struct gaudi_collective_properties *cprop;
1316
u32 stream, queue_id, sob_group_offset;
1317
struct gaudi_device *gaudi;
1318
struct hl_device *hdev;
1319
struct hl_cs_job *job;
1320
struct hl_ctx *ctx;
1321
1322
ctx = cs->ctx;
1323
hdev = ctx->hdev;
1324
gaudi = hdev->asic_specific;
1325
cprop = &gaudi->collective_props;
1326
1327
if (cs->encaps_signals) {
1328
cs_cmpl->hw_sob = handle->hw_sob;
1329
/* at this checkpoint we only need the hw_sob pointer
1330
* for the completion check before start going over the jobs
1331
* of the master/slaves, the sob_value will be taken later on
1332
* in gaudi_collective_slave_init_job depends on each
1333
* job wait offset value.
1334
*/
1335
cs_cmpl->sob_val = 0;
1336
} else {
1337
/* copy the SOB id and value of the signal CS */
1338
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340
}
1341
1342
/* check again if the signal cs already completed.
1343
* if yes then don't send any wait cs since the hw_sob
1344
* could be in reset already. if signal is not completed
1345
* then get refcount to hw_sob to prevent resetting the sob
1346
* while wait cs is not submitted.
1347
* note that this check is protected by two locks,
1348
* hw queue lock and completion object lock,
1349
* and the same completion object lock also protects
1350
* the hw_sob reset handler function.
1351
* The hw_queue lock prevent out of sync of hw_sob
1352
* refcount value, changed by signal/wait flows.
1353
*/
1354
spin_lock(&signal_cs_cmpl->lock);
1355
1356
if (completion_done(&cs->signal_fence->completion)) {
1357
spin_unlock(&signal_cs_cmpl->lock);
1358
return -EINVAL;
1359
}
1360
/* Increment kref since all slave queues are now waiting on it */
1361
kref_get(&cs_cmpl->hw_sob->kref);
1362
1363
spin_unlock(&signal_cs_cmpl->lock);
1364
1365
/* Calculate the stream from collective master queue (1st job) */
1366
job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367
stream = job->hw_queue_id % 4;
1368
sob_group_offset =
1369
stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370
1371
list_for_each_entry(job, &cs->job_list, cs_node) {
1372
queue_id = job->hw_queue_id;
1373
1374
if (hdev->kernel_queues[queue_id].collective_mode ==
1375
HL_COLLECTIVE_MASTER)
1376
gaudi_collective_master_init_job(hdev, job, stream,
1377
sob_group_offset);
1378
else
1379
gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380
}
1381
1382
cs_cmpl->sob_group = sob_group_offset;
1383
1384
/* Handle sob group kref and wraparound */
1385
kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1386
cprop->next_sob_group_val[stream]++;
1387
1388
if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389
/*
1390
* Decrement as we reached the max value.
1391
* The release function won't be called here as we've
1392
* just incremented the refcount.
1393
*/
1394
kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1395
gaudi_sob_group_reset_error);
1396
cprop->next_sob_group_val[stream] = 1;
1397
/* only two SOBs are currently in use */
1398
cprop->curr_sob_group_idx[stream] =
1399
(cprop->curr_sob_group_idx[stream] + 1) &
1400
(HL_RSVD_SOBS - 1);
1401
1402
gaudi_collective_map_sobs(hdev, stream);
1403
1404
dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405
cprop->curr_sob_group_idx[stream], stream);
1406
}
1407
1408
mb();
1409
hl_fence_put(cs->signal_fence);
1410
cs->signal_fence = NULL;
1411
1412
return 0;
1413
}
1414
1415
static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416
{
1417
u32 cacheline_end, additional_commands;
1418
1419
cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420
additional_commands = sizeof(struct packet_msg_prot) * 2;
1421
1422
if (user_cb_size + additional_commands > cacheline_end)
1423
return cacheline_end - user_cb_size + additional_commands;
1424
else
1425
return additional_commands;
1426
}
1427
1428
static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429
struct hl_ctx *ctx, struct hl_cs *cs,
1430
enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431
u32 encaps_signal_offset)
1432
{
1433
struct hw_queue_properties *hw_queue_prop;
1434
struct hl_cs_counters_atomic *cntr;
1435
struct hl_cs_job *job;
1436
struct hl_cb *cb;
1437
u32 cb_size;
1438
bool patched_cb;
1439
1440
cntr = &hdev->aggregated_cs_counters;
1441
1442
if (mode == HL_COLLECTIVE_MASTER) {
1443
/* CB size of collective master queue contains
1444
* 4 msg short packets for monitor 1 configuration
1445
* 1 fence packet
1446
* 4 msg short packets for monitor 2 configuration
1447
* 1 fence packet
1448
* 2 msg prot packets for completion and MSI
1449
*/
1450
cb_size = sizeof(struct packet_msg_short) * 8 +
1451
sizeof(struct packet_fence) * 2 +
1452
sizeof(struct packet_msg_prot) * 2;
1453
patched_cb = true;
1454
} else {
1455
/* CB size of collective slave queues contains
1456
* 4 msg short packets for monitor configuration
1457
* 1 fence packet
1458
* 1 additional msg short packet for sob signal
1459
*/
1460
cb_size = sizeof(struct packet_msg_short) * 5 +
1461
sizeof(struct packet_fence);
1462
patched_cb = false;
1463
}
1464
1465
hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466
job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467
if (!job) {
1468
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469
atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470
dev_err(hdev->dev, "Failed to allocate a new job\n");
1471
return -ENOMEM;
1472
}
1473
1474
/* Allocate internal mapped CB for non patched CBs */
1475
cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1476
if (!cb) {
1477
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1478
atomic64_inc(&cntr->out_of_mem_drop_cnt);
1479
kfree(job);
1480
return -EFAULT;
1481
}
1482
1483
job->id = 0;
1484
job->cs = cs;
1485
job->user_cb = cb;
1486
atomic_inc(&job->user_cb->cs_cnt);
1487
job->user_cb_size = cb_size;
1488
job->hw_queue_id = queue_id;
1489
1490
/* since its guaranteed to have only one chunk in the collective wait
1491
* cs, we can use this chunk to set the encapsulated signal offset
1492
* in the jobs.
1493
*/
1494
if (cs->encaps_signals)
1495
job->encaps_sig_wait_offset = encaps_signal_offset;
1496
1497
/*
1498
* No need in parsing, user CB is the patched CB.
1499
* We call hl_cb_destroy() out of two reasons - we don't need
1500
* the CB in the CB idr anymore and to decrement its refcount as
1501
* it was incremented inside hl_cb_kernel_create().
1502
*/
1503
if (patched_cb)
1504
job->patched_cb = job->user_cb;
1505
else
1506
job->patched_cb = NULL;
1507
1508
job->job_cb_size = job->user_cb_size;
1509
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1510
1511
/* increment refcount as for external queues we get completion */
1512
if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1513
cs_get(cs);
1514
1515
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516
1517
list_add_tail(&job->cs_node, &cs->job_list);
1518
1519
hl_debugfs_add_job(hdev, job);
1520
1521
return 0;
1522
}
1523
1524
static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1525
struct hl_ctx *ctx, struct hl_cs *cs,
1526
u32 wait_queue_id, u32 collective_engine_id,
1527
u32 encaps_signal_offset)
1528
{
1529
struct gaudi_device *gaudi = hdev->asic_specific;
1530
struct hw_queue_properties *hw_queue_prop;
1531
u32 queue_id, collective_queue, num_jobs;
1532
u32 stream, nic_queue, nic_idx = 0;
1533
bool skip;
1534
int i, rc = 0;
1535
1536
/* Verify wait queue id is configured as master */
1537
hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1538
if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539
dev_err(hdev->dev,
1540
"Queue %d is not configured as collective master\n",
1541
wait_queue_id);
1542
return -EINVAL;
1543
}
1544
1545
/* Verify engine id is supported */
1546
if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1547
collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548
dev_err(hdev->dev,
1549
"Collective wait does not support engine %u\n",
1550
collective_engine_id);
1551
return -EINVAL;
1552
}
1553
1554
stream = wait_queue_id % 4;
1555
1556
if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1557
collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558
else
1559
collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560
1561
num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1562
nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563
1564
/* First job goes to the collective master queue, it will wait for
1565
* the collective slave queues to finish execution.
1566
* The synchronization is done using two monitors:
1567
* First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568
* reduction engine (DMA5/TPC7).
1569
*
1570
* Rest of the jobs goes to the collective slave queues which will
1571
* all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572
*/
1573
for (i = 0 ; i < num_jobs ; i++) {
1574
if (i == 0) {
1575
queue_id = wait_queue_id;
1576
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1577
HL_COLLECTIVE_MASTER, queue_id,
1578
wait_queue_id, encaps_signal_offset);
1579
} else {
1580
if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1581
if (gaudi->hw_cap_initialized &
1582
BIT(HW_CAP_NIC_SHIFT + nic_idx))
1583
skip = false;
1584
else
1585
skip = true;
1586
1587
queue_id = nic_queue;
1588
nic_queue += 4;
1589
nic_idx++;
1590
1591
if (skip)
1592
continue;
1593
} else {
1594
queue_id = collective_queue;
1595
}
1596
1597
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1598
HL_COLLECTIVE_SLAVE, queue_id,
1599
wait_queue_id, encaps_signal_offset);
1600
}
1601
1602
if (rc)
1603
return rc;
1604
}
1605
1606
return rc;
1607
}
1608
1609
static int gaudi_late_init(struct hl_device *hdev)
1610
{
1611
struct gaudi_device *gaudi = hdev->asic_specific;
1612
int rc;
1613
1614
rc = gaudi->cpucp_info_get(hdev);
1615
if (rc) {
1616
dev_err(hdev->dev, "Failed to get cpucp info\n");
1617
return rc;
1618
}
1619
1620
if ((hdev->card_type == cpucp_card_type_pci) &&
1621
(hdev->nic_ports_mask & 0x3)) {
1622
dev_info(hdev->dev,
1623
"PCI card detected, only 8 ports are enabled\n");
1624
hdev->nic_ports_mask &= ~0x3;
1625
1626
/* Stop and disable unused NIC QMANs */
1627
WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1628
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1629
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630
1631
WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1632
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1633
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634
1635
WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1636
WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637
1638
gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1639
}
1640
1641
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1642
if (rc)
1643
return rc;
1644
1645
/* Scrub both SRAM and DRAM */
1646
rc = hdev->asic_funcs->scrub_device_mem(hdev);
1647
if (rc)
1648
goto disable_pci_access;
1649
1650
rc = gaudi_fetch_psoc_frequency(hdev);
1651
if (rc) {
1652
dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1653
goto disable_pci_access;
1654
}
1655
1656
rc = gaudi_mmu_clear_pgt_range(hdev);
1657
if (rc) {
1658
dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1659
goto disable_pci_access;
1660
}
1661
1662
rc = gaudi_init_tpc_mem(hdev);
1663
if (rc) {
1664
dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1665
goto disable_pci_access;
1666
}
1667
1668
rc = gaudi_collective_init(hdev);
1669
if (rc) {
1670
dev_err(hdev->dev, "Failed to init collective\n");
1671
goto disable_pci_access;
1672
}
1673
1674
/* We only support a single ASID for the user, so for the sake of optimization, just
1675
* initialize the ASID one time during device initialization with the fixed value of 1
1676
*/
1677
gaudi_mmu_prepare(hdev, 1);
1678
1679
hl_fw_set_pll_profile(hdev);
1680
1681
return 0;
1682
1683
disable_pci_access:
1684
hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1685
1686
return rc;
1687
}
1688
1689
static void gaudi_late_fini(struct hl_device *hdev)
1690
{
1691
hl_hwmon_release_resources(hdev);
1692
}
1693
1694
static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1695
{
1696
dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1697
void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1698
int i, j, rc = 0;
1699
1700
/*
1701
* The device CPU works with 40-bits addresses, while bit 39 must be set
1702
* to '1' when accessing the host.
1703
* Bits 49:39 of the full host address are saved for a later
1704
* configuration of the HW to perform extension to 50 bits.
1705
* Because there is a single HW register that holds the extension bits,
1706
* these bits must be identical in all allocated range.
1707
*/
1708
1709
for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1710
virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1711
&dma_addr_arr[i],
1712
GFP_KERNEL | __GFP_ZERO);
1713
if (!virt_addr_arr[i]) {
1714
rc = -ENOMEM;
1715
goto free_dma_mem_arr;
1716
}
1717
1718
end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1719
if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1720
GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1721
break;
1722
}
1723
1724
if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1725
dev_err(hdev->dev,
1726
"MSB of CPU accessible DMA memory are not identical in all range\n");
1727
rc = -EFAULT;
1728
goto free_dma_mem_arr;
1729
}
1730
1731
hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1732
hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1733
hdev->cpu_pci_msb_addr =
1734
GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1735
1736
if (!hdev->asic_prop.fw_security_enabled)
1737
GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1738
1739
free_dma_mem_arr:
1740
for (j = 0 ; j < i ; j++)
1741
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1742
dma_addr_arr[j]);
1743
1744
return rc;
1745
}
1746
1747
static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1748
{
1749
struct gaudi_device *gaudi = hdev->asic_specific;
1750
struct gaudi_internal_qman_info *q;
1751
u32 i;
1752
1753
for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1754
q = &gaudi->internal_qmans[i];
1755
if (!q->pq_kernel_addr)
1756
continue;
1757
hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1758
}
1759
}
1760
1761
static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1762
{
1763
struct gaudi_device *gaudi = hdev->asic_specific;
1764
struct gaudi_internal_qman_info *q;
1765
int rc, i;
1766
1767
for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1768
if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1769
continue;
1770
1771
q = &gaudi->internal_qmans[i];
1772
1773
switch (i) {
1774
case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1775
q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1776
break;
1777
case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1778
q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1779
break;
1780
case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1781
q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1782
break;
1783
case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1784
q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1785
break;
1786
default:
1787
dev_err(hdev->dev, "Bad internal queue index %d", i);
1788
rc = -EINVAL;
1789
goto free_internal_qmans_pq_mem;
1790
}
1791
1792
q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1793
GFP_KERNEL | __GFP_ZERO);
1794
if (!q->pq_kernel_addr) {
1795
rc = -ENOMEM;
1796
goto free_internal_qmans_pq_mem;
1797
}
1798
}
1799
1800
return 0;
1801
1802
free_internal_qmans_pq_mem:
1803
gaudi_free_internal_qmans_pq_mem(hdev);
1804
return rc;
1805
}
1806
1807
static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1808
{
1809
struct asic_fixed_properties *prop = &hdev->asic_prop;
1810
struct pci_mem_region *region;
1811
1812
/* CFG */
1813
region = &hdev->pci_mem_region[PCI_REGION_CFG];
1814
region->region_base = CFG_BASE;
1815
region->region_size = CFG_SIZE;
1816
region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1817
region->bar_size = CFG_BAR_SIZE;
1818
region->bar_id = CFG_BAR_ID;
1819
region->used = 1;
1820
1821
/* SRAM */
1822
region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1823
region->region_base = SRAM_BASE_ADDR;
1824
region->region_size = SRAM_SIZE;
1825
region->offset_in_bar = 0;
1826
region->bar_size = SRAM_BAR_SIZE;
1827
region->bar_id = SRAM_BAR_ID;
1828
region->used = 1;
1829
1830
/* DRAM */
1831
region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1832
region->region_base = DRAM_PHYS_BASE;
1833
region->region_size = hdev->asic_prop.dram_size;
1834
region->offset_in_bar = 0;
1835
region->bar_size = prop->dram_pci_bar_size;
1836
region->bar_id = HBM_BAR_ID;
1837
region->used = 1;
1838
1839
/* SP SRAM */
1840
region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1841
region->region_base = PSOC_SCRATCHPAD_ADDR;
1842
region->region_size = PSOC_SCRATCHPAD_SIZE;
1843
region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1844
region->bar_size = CFG_BAR_SIZE;
1845
region->bar_id = CFG_BAR_ID;
1846
region->used = 1;
1847
}
1848
1849
static int gaudi_sw_init(struct hl_device *hdev)
1850
{
1851
struct gaudi_device *gaudi;
1852
u32 i, event_id = 0;
1853
int rc;
1854
1855
/* Allocate device structure */
1856
gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1857
if (!gaudi)
1858
return -ENOMEM;
1859
1860
for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1861
if (gaudi_irq_map_table[i].valid) {
1862
if (event_id == GAUDI_EVENT_SIZE) {
1863
dev_err(hdev->dev,
1864
"Event array exceeds the limit of %u events\n",
1865
GAUDI_EVENT_SIZE);
1866
rc = -EINVAL;
1867
goto free_gaudi_device;
1868
}
1869
1870
gaudi->events[event_id++] =
1871
gaudi_irq_map_table[i].fc_id;
1872
}
1873
}
1874
1875
gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1876
1877
hdev->asic_specific = gaudi;
1878
1879
/* Create DMA pool for small allocations */
1880
hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1881
&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1882
if (!hdev->dma_pool) {
1883
dev_err(hdev->dev, "failed to create DMA pool\n");
1884
rc = -ENOMEM;
1885
goto free_gaudi_device;
1886
}
1887
1888
rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1889
if (rc)
1890
goto free_dma_pool;
1891
1892
hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1893
if (!hdev->cpu_accessible_dma_pool) {
1894
dev_err(hdev->dev,
1895
"Failed to create CPU accessible DMA pool\n");
1896
rc = -ENOMEM;
1897
goto free_cpu_dma_mem;
1898
}
1899
1900
rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1901
(uintptr_t) hdev->cpu_accessible_dma_mem,
1902
HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1903
if (rc) {
1904
dev_err(hdev->dev,
1905
"Failed to add memory to CPU accessible DMA pool\n");
1906
rc = -EFAULT;
1907
goto free_cpu_accessible_dma_pool;
1908
}
1909
1910
rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1911
if (rc)
1912
goto free_cpu_accessible_dma_pool;
1913
1914
spin_lock_init(&gaudi->hw_queues_lock);
1915
1916
hdev->supports_sync_stream = true;
1917
hdev->supports_coresight = true;
1918
hdev->supports_staged_submission = true;
1919
hdev->supports_wait_for_multi_cs = true;
1920
1921
hdev->asic_funcs->set_pci_memory_regions(hdev);
1922
hdev->stream_master_qid_arr =
1923
hdev->asic_funcs->get_stream_master_qid_arr();
1924
hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1925
1926
return 0;
1927
1928
free_cpu_accessible_dma_pool:
1929
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1930
free_cpu_dma_mem:
1931
if (!hdev->asic_prop.fw_security_enabled)
1932
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1933
hdev->cpu_pci_msb_addr);
1934
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1935
hdev->cpu_accessible_dma_address);
1936
free_dma_pool:
1937
dma_pool_destroy(hdev->dma_pool);
1938
free_gaudi_device:
1939
kfree(gaudi);
1940
return rc;
1941
}
1942
1943
static int gaudi_sw_fini(struct hl_device *hdev)
1944
{
1945
struct gaudi_device *gaudi = hdev->asic_specific;
1946
1947
gaudi_free_internal_qmans_pq_mem(hdev);
1948
1949
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1950
1951
if (!hdev->asic_prop.fw_security_enabled)
1952
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1953
hdev->cpu_pci_msb_addr);
1954
1955
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1956
hdev->cpu_accessible_dma_address);
1957
1958
dma_pool_destroy(hdev->dma_pool);
1959
1960
kfree(gaudi);
1961
1962
return 0;
1963
}
1964
1965
static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1966
{
1967
struct hl_device *hdev = arg;
1968
int i;
1969
1970
if (hdev->disabled)
1971
return IRQ_HANDLED;
1972
1973
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1974
hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1975
1976
hl_irq_handler_eq(irq, &hdev->event_queue);
1977
1978
return IRQ_HANDLED;
1979
}
1980
1981
/*
1982
* For backward compatibility, new MSI interrupts should be set after the
1983
* existing CPU and NIC interrupts.
1984
*/
1985
static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1986
bool cpu_eq)
1987
{
1988
int msi_vec;
1989
1990
if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1991
dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1992
GAUDI_EVENT_QUEUE_MSI_IDX);
1993
1994
msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1995
(nr + NIC_NUMBER_OF_ENGINES + 1);
1996
1997
return pci_irq_vector(hdev->pdev, msi_vec);
1998
}
1999
2000
static int gaudi_enable_msi_single(struct hl_device *hdev)
2001
{
2002
int rc, irq;
2003
2004
dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2005
2006
irq = gaudi_pci_irq_vector(hdev, 0, false);
2007
rc = request_irq(irq, gaudi_irq_handler_single, 0,
2008
"gaudi single msi", hdev);
2009
if (rc)
2010
dev_err(hdev->dev,
2011
"Failed to request single MSI IRQ\n");
2012
2013
return rc;
2014
}
2015
2016
static int gaudi_enable_msi(struct hl_device *hdev)
2017
{
2018
struct gaudi_device *gaudi = hdev->asic_specific;
2019
int rc;
2020
2021
if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2022
return 0;
2023
2024
rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2025
if (rc < 0) {
2026
dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2027
return rc;
2028
}
2029
2030
rc = gaudi_enable_msi_single(hdev);
2031
if (rc)
2032
goto free_pci_irq_vectors;
2033
2034
gaudi->hw_cap_initialized |= HW_CAP_MSI;
2035
2036
return 0;
2037
2038
free_pci_irq_vectors:
2039
pci_free_irq_vectors(hdev->pdev);
2040
return rc;
2041
}
2042
2043
static void gaudi_sync_irqs(struct hl_device *hdev)
2044
{
2045
struct gaudi_device *gaudi = hdev->asic_specific;
2046
2047
if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2048
return;
2049
2050
/* Wait for all pending IRQs to be finished */
2051
synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2052
}
2053
2054
static void gaudi_disable_msi(struct hl_device *hdev)
2055
{
2056
struct gaudi_device *gaudi = hdev->asic_specific;
2057
2058
if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2059
return;
2060
2061
gaudi_sync_irqs(hdev);
2062
free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2063
pci_free_irq_vectors(hdev->pdev);
2064
2065
gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2066
}
2067
2068
static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2069
{
2070
struct gaudi_device *gaudi = hdev->asic_specific;
2071
2072
if (hdev->asic_prop.fw_security_enabled)
2073
return;
2074
2075
if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2076
CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2077
return;
2078
2079
if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2080
return;
2081
2082
WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2083
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2084
WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2085
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086
WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2087
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088
WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2089
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090
WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2091
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092
WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2093
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094
WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2095
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096
WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2097
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098
2099
WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2100
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2101
WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2102
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103
WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2104
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105
WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2106
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2107
WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2108
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2109
WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2110
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2111
WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2112
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2113
WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2114
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2115
2116
WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2117
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2118
WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2119
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2120
WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2121
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2122
WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2123
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2124
WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2125
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2126
WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2127
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2128
WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2129
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2130
WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2131
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2132
2133
gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2134
}
2135
2136
static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2137
{
2138
struct gaudi_device *gaudi = hdev->asic_specific;
2139
2140
if (hdev->asic_prop.fw_security_enabled)
2141
return;
2142
2143
if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2144
CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2145
return;
2146
2147
if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2148
return;
2149
2150
WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2151
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2152
WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2153
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154
WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2155
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2156
WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2157
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2158
WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2159
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2160
WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2161
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2162
WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2163
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2164
WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2165
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2166
2167
WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2168
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169
WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2170
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171
WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2172
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173
WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2174
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175
WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2176
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177
WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2178
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179
WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2180
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181
WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2182
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183
2184
WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2185
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2186
WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2187
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188
WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2189
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2190
WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2191
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2192
WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2193
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2194
WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2195
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2196
WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2197
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2198
WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2199
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2200
2201
gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2202
}
2203
2204
static void gaudi_init_e2e(struct hl_device *hdev)
2205
{
2206
if (hdev->asic_prop.fw_security_enabled)
2207
return;
2208
2209
if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2210
CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2211
return;
2212
2213
WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2214
WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2215
WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2216
WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2217
2218
WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2219
WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2220
WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2221
WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2222
2223
WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2224
WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2225
WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2226
WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2227
2228
WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2229
WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2230
WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2231
WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2232
2233
WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2234
WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2235
WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2236
WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2237
2238
WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2239
WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2240
WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2241
WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2242
2243
WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2244
WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2245
WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2246
WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2247
2248
WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2249
WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2250
WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2251
WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2252
2253
WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2254
WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2255
WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2256
WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2257
2258
WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2259
WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2260
WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2261
WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2262
2263
WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2264
WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2265
WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2266
WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2267
2268
WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2269
WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2270
WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2271
WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2272
2273
WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2274
WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2275
WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2276
WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2277
2278
WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2279
WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2280
WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2281
WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2282
2283
WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2284
WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2285
WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2286
WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2287
2288
WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2289
WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2290
WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2291
WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2292
2293
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2294
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2295
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2296
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2297
2298
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2299
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2300
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2301
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2302
2303
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2304
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2305
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2306
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2307
2308
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2309
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2310
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2311
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2312
2313
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2314
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2315
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2316
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2317
2318
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2319
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2320
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2321
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2322
2323
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2324
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2325
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2326
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2327
2328
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2329
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2330
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2331
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2332
2333
WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2334
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2335
WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2336
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2337
2338
WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2339
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2340
WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2341
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2342
2343
WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2344
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2345
WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2346
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2347
2348
WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2349
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2350
WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2351
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2352
2353
WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2354
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2355
WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2356
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2357
2358
WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2359
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2360
WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2361
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2362
2363
WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2364
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2365
WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2366
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2367
2368
WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2369
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2370
WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2371
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2372
2373
WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2374
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2375
WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2376
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2377
2378
WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2379
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2380
WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2381
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2382
2383
WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2384
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2385
WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2386
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2387
2388
WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2389
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2390
WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2391
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2392
2393
WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2394
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2395
WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2396
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2397
2398
WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2399
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2400
WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2401
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2402
2403
WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2404
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2405
WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2406
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2407
2408
WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2409
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2410
WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2411
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2412
2413
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2414
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2415
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2416
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2417
2418
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2419
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2420
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2421
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2422
2423
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2424
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2425
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2426
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2427
2428
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2429
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2430
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2431
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2432
2433
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2434
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2435
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2436
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2437
2438
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2439
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2440
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2441
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2442
2443
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2444
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2445
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2446
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2447
2448
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2449
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2450
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2451
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2452
}
2453
2454
static void gaudi_init_hbm_cred(struct hl_device *hdev)
2455
{
2456
u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2457
2458
if (hdev->asic_prop.fw_security_enabled)
2459
return;
2460
2461
if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2462
CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2463
return;
2464
2465
hbm0_wr = 0x33333333;
2466
hbm0_rd = 0x77777777;
2467
hbm1_wr = 0x55555555;
2468
hbm1_rd = 0xDDDDDDDD;
2469
2470
WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2471
WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2472
WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2473
WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2474
2475
WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2476
WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2477
WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2478
WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2479
2480
WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2481
WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2482
WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2483
WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2484
2485
WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2486
WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2487
WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2488
WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2489
2490
WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2491
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493
WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2494
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496
WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2497
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499
WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2500
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2501
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2502
2503
WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2504
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506
WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2507
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509
WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2510
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512
WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2513
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2514
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2515
}
2516
2517
static void gaudi_init_golden_registers(struct hl_device *hdev)
2518
{
2519
u32 tpc_offset;
2520
int tpc_id, i;
2521
2522
gaudi_init_e2e(hdev);
2523
gaudi_init_hbm_cred(hdev);
2524
2525
for (tpc_id = 0, tpc_offset = 0;
2526
tpc_id < TPC_NUMBER_OF_ENGINES;
2527
tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2528
/* Mask all arithmetic interrupts from TPC */
2529
WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2530
/* Set 16 cache lines */
2531
WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2532
ICACHE_FETCH_LINE_NUM, 2);
2533
}
2534
2535
/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2536
for (i = 0 ; i < 128 ; i += 8)
2537
writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2538
2539
WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540
WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541
WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542
WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543
}
2544
2545
static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2546
int qman_id, dma_addr_t qman_pq_addr)
2547
{
2548
struct cpu_dyn_regs *dyn_regs =
2549
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2550
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2551
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2552
u32 q_off, dma_qm_offset;
2553
u32 dma_qm_err_cfg, irq_handler_offset;
2554
2555
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2556
2557
mtr_base_en_lo = lower_32_bits(CFG_BASE +
2558
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2559
mtr_base_en_hi = upper_32_bits(CFG_BASE +
2560
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2561
so_base_en_lo = lower_32_bits(CFG_BASE +
2562
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2563
so_base_en_hi = upper_32_bits(CFG_BASE +
2564
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2565
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2566
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2567
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2568
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2569
so_base_ws_lo = lower_32_bits(CFG_BASE +
2570
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2571
so_base_ws_hi = upper_32_bits(CFG_BASE +
2572
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573
2574
q_off = dma_qm_offset + qman_id * 4;
2575
2576
WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2577
WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2578
2579
WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2580
WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2581
WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2582
2583
WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2584
WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2585
QMAN_LDMA_SRC_OFFSET);
2586
WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2587
QMAN_LDMA_DST_OFFSET);
2588
2589
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2590
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2591
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2592
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2593
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2594
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2595
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2596
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2597
2598
WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2599
2600
/* The following configuration is needed only once per QMAN */
2601
if (qman_id == 0) {
2602
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2603
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2604
le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2605
2606
/* Configure RAZWI IRQ */
2607
dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2608
if (hdev->stop_on_err)
2609
dma_qm_err_cfg |=
2610
PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2611
2612
WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2613
2614
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2615
lower_32_bits(CFG_BASE + irq_handler_offset));
2616
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2617
upper_32_bits(CFG_BASE + irq_handler_offset));
2618
2619
WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2620
gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2621
dma_id);
2622
2623
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2624
QM_ARB_ERR_MSG_EN_MASK);
2625
2626
/* Set timeout to maximum */
2627
WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2628
2629
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2630
QMAN_EXTERNAL_MAKE_TRUSTED);
2631
2632
WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2633
}
2634
}
2635
2636
static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2637
{
2638
struct cpu_dyn_regs *dyn_regs =
2639
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2640
u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2641
u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2642
u32 irq_handler_offset;
2643
2644
/* Set to maximum possible according to physical size */
2645
WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2646
WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2647
2648
/* WA for H/W bug H3-2116 */
2649
WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2650
2651
/* STOP_ON bit implies no completion to operation in case of RAZWI */
2652
if (hdev->stop_on_err)
2653
dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2654
2655
WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2656
2657
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2658
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2659
le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2660
2661
WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2662
lower_32_bits(CFG_BASE + irq_handler_offset));
2663
WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2664
upper_32_bits(CFG_BASE + irq_handler_offset));
2665
2666
WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2667
gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2668
WREG32(mmDMA0_CORE_PROT + dma_offset,
2669
1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2670
/* If the channel is secured, it should be in MMU bypass mode */
2671
WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2672
1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2673
WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2674
}
2675
2676
static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2677
u32 enable_mask)
2678
{
2679
u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2680
2681
WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2682
}
2683
2684
static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2685
{
2686
struct gaudi_device *gaudi = hdev->asic_specific;
2687
struct hl_hw_queue *q;
2688
int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2689
2690
if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2691
return;
2692
2693
for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2694
dma_id = gaudi_dma_assignment[i];
2695
/*
2696
* For queues after the CPU Q need to add 1 to get the correct
2697
* queue. In addition, need to add the CPU EQ and NIC IRQs in
2698
* order to get the correct MSI register.
2699
*/
2700
if (dma_id > 1) {
2701
cpu_skip = 1;
2702
nic_skip = NIC_NUMBER_OF_ENGINES;
2703
} else {
2704
cpu_skip = 0;
2705
nic_skip = 0;
2706
}
2707
2708
for (j = 0 ; j < QMAN_STREAMS ; j++) {
2709
q_idx = 4 * dma_id + j + cpu_skip;
2710
q = &hdev->kernel_queues[q_idx];
2711
q->cq_id = cq_id++;
2712
q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2713
gaudi_init_pci_dma_qman(hdev, dma_id, j,
2714
q->bus_address);
2715
}
2716
2717
gaudi_init_dma_core(hdev, dma_id);
2718
2719
gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2720
}
2721
2722
gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2723
}
2724
2725
static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2726
int qman_id, u64 qman_base_addr)
2727
{
2728
struct cpu_dyn_regs *dyn_regs =
2729
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2730
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2731
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2732
u32 dma_qm_err_cfg, irq_handler_offset;
2733
u32 q_off, dma_qm_offset;
2734
2735
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2736
2737
mtr_base_en_lo = lower_32_bits(CFG_BASE +
2738
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739
mtr_base_en_hi = upper_32_bits(CFG_BASE +
2740
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741
so_base_en_lo = lower_32_bits(CFG_BASE +
2742
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743
so_base_en_hi = upper_32_bits(CFG_BASE +
2744
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2746
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2748
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749
so_base_ws_lo = lower_32_bits(CFG_BASE +
2750
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751
so_base_ws_hi = upper_32_bits(CFG_BASE +
2752
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753
2754
q_off = dma_qm_offset + qman_id * 4;
2755
2756
if (qman_id < 4) {
2757
WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2758
lower_32_bits(qman_base_addr));
2759
WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2760
upper_32_bits(qman_base_addr));
2761
2762
WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2763
WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2764
WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2765
2766
WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2767
QMAN_CPDMA_SIZE_OFFSET);
2768
WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2769
QMAN_CPDMA_SRC_OFFSET);
2770
WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2771
QMAN_CPDMA_DST_OFFSET);
2772
} else {
2773
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2774
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2775
le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2776
2777
WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2778
QMAN_LDMA_SIZE_OFFSET);
2779
WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2780
QMAN_LDMA_SRC_OFFSET);
2781
WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2782
QMAN_LDMA_DST_OFFSET);
2783
2784
/* Configure RAZWI IRQ */
2785
dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2786
if (hdev->stop_on_err)
2787
dma_qm_err_cfg |=
2788
HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2789
2790
WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2791
2792
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2793
lower_32_bits(CFG_BASE + irq_handler_offset));
2794
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2795
upper_32_bits(CFG_BASE + irq_handler_offset));
2796
2797
WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2798
gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2799
dma_id);
2800
2801
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2802
QM_ARB_ERR_MSG_EN_MASK);
2803
2804
/* Set timeout to maximum */
2805
WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2806
2807
WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2808
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2809
QMAN_INTERNAL_MAKE_TRUSTED);
2810
}
2811
2812
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2813
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2814
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2815
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2816
2817
/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2818
if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2819
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2820
mtr_base_ws_lo);
2821
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2822
mtr_base_ws_hi);
2823
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2824
so_base_ws_lo);
2825
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2826
so_base_ws_hi);
2827
}
2828
}
2829
2830
static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2831
{
2832
struct gaudi_device *gaudi = hdev->asic_specific;
2833
struct gaudi_internal_qman_info *q;
2834
u64 qman_base_addr;
2835
int i, j, dma_id, internal_q_index;
2836
2837
if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2838
return;
2839
2840
for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2841
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2842
2843
for (j = 0 ; j < QMAN_STREAMS ; j++) {
2844
/*
2845
* Add the CPU queue in order to get the correct queue
2846
* number as all internal queue are placed after it
2847
*/
2848
internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2849
2850
q = &gaudi->internal_qmans[internal_q_index];
2851
qman_base_addr = (u64) q->pq_dma_addr;
2852
gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2853
qman_base_addr);
2854
}
2855
2856
/* Initializing lower CP for HBM DMA QMAN */
2857
gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2858
2859
gaudi_init_dma_core(hdev, dma_id);
2860
2861
gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2862
}
2863
2864
gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2865
}
2866
2867
static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2868
int qman_id, u64 qman_base_addr)
2869
{
2870
struct cpu_dyn_regs *dyn_regs =
2871
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2872
u32 mtr_base_lo, mtr_base_hi;
2873
u32 so_base_lo, so_base_hi;
2874
u32 irq_handler_offset;
2875
u32 q_off, mme_id;
2876
u32 mme_qm_err_cfg;
2877
2878
mtr_base_lo = lower_32_bits(CFG_BASE +
2879
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2880
mtr_base_hi = upper_32_bits(CFG_BASE +
2881
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2882
so_base_lo = lower_32_bits(CFG_BASE +
2883
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2884
so_base_hi = upper_32_bits(CFG_BASE +
2885
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2886
2887
q_off = mme_offset + qman_id * 4;
2888
2889
if (qman_id < 4) {
2890
WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2891
lower_32_bits(qman_base_addr));
2892
WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2893
upper_32_bits(qman_base_addr));
2894
2895
WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2896
WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2897
WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2898
2899
WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2900
QMAN_CPDMA_SIZE_OFFSET);
2901
WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2902
QMAN_CPDMA_SRC_OFFSET);
2903
WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2904
QMAN_CPDMA_DST_OFFSET);
2905
} else {
2906
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2907
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2908
le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2909
2910
WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2911
QMAN_LDMA_SIZE_OFFSET);
2912
WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2913
QMAN_LDMA_SRC_OFFSET);
2914
WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2915
QMAN_LDMA_DST_OFFSET);
2916
2917
/* Configure RAZWI IRQ */
2918
mme_id = mme_offset /
2919
(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2920
2921
mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2922
if (hdev->stop_on_err)
2923
mme_qm_err_cfg |=
2924
MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2925
2926
WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2927
2928
WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2929
lower_32_bits(CFG_BASE + irq_handler_offset));
2930
WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2931
upper_32_bits(CFG_BASE + irq_handler_offset));
2932
2933
WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2934
gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2935
mme_id);
2936
2937
WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2938
QM_ARB_ERR_MSG_EN_MASK);
2939
2940
/* Set timeout to maximum */
2941
WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2942
2943
WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944
WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945
QMAN_INTERNAL_MAKE_TRUSTED);
2946
}
2947
2948
WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949
WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950
WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951
WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952
}
2953
2954
static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955
{
2956
struct gaudi_device *gaudi = hdev->asic_specific;
2957
struct gaudi_internal_qman_info *q;
2958
u64 qman_base_addr;
2959
u32 mme_offset;
2960
int i, internal_q_index;
2961
2962
if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963
return;
2964
2965
/*
2966
* map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967
* and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2968
*/
2969
2970
mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971
2972
for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973
internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974
q = &gaudi->internal_qmans[internal_q_index];
2975
qman_base_addr = (u64) q->pq_dma_addr;
2976
gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2977
qman_base_addr);
2978
if (i == 3)
2979
mme_offset = 0;
2980
}
2981
2982
/* Initializing lower CP for MME QMANs */
2983
mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984
gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2985
gaudi_init_mme_qman(hdev, 0, 4, 0);
2986
2987
WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988
WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989
2990
gaudi->hw_cap_initialized |= HW_CAP_MME;
2991
}
2992
2993
static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994
int qman_id, u64 qman_base_addr)
2995
{
2996
struct cpu_dyn_regs *dyn_regs =
2997
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000
u32 tpc_qm_err_cfg, irq_handler_offset;
3001
u32 q_off, tpc_id;
3002
3003
mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005
mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007
so_base_en_lo = lower_32_bits(CFG_BASE +
3008
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009
so_base_en_hi = upper_32_bits(CFG_BASE +
3010
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015
so_base_ws_lo = lower_32_bits(CFG_BASE +
3016
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017
so_base_ws_hi = upper_32_bits(CFG_BASE +
3018
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019
3020
q_off = tpc_offset + qman_id * 4;
3021
3022
tpc_id = tpc_offset /
3023
(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024
3025
if (qman_id < 4) {
3026
WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027
lower_32_bits(qman_base_addr));
3028
WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029
upper_32_bits(qman_base_addr));
3030
3031
WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032
WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033
WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034
3035
WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036
QMAN_CPDMA_SIZE_OFFSET);
3037
WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038
QMAN_CPDMA_SRC_OFFSET);
3039
WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040
QMAN_CPDMA_DST_OFFSET);
3041
} else {
3042
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044
le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045
3046
WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047
QMAN_LDMA_SIZE_OFFSET);
3048
WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049
QMAN_LDMA_SRC_OFFSET);
3050
WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051
QMAN_LDMA_DST_OFFSET);
3052
3053
/* Configure RAZWI IRQ */
3054
tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055
if (hdev->stop_on_err)
3056
tpc_qm_err_cfg |=
3057
TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058
3059
WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060
3061
WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062
lower_32_bits(CFG_BASE + irq_handler_offset));
3063
WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064
upper_32_bits(CFG_BASE + irq_handler_offset));
3065
3066
WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067
gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068
tpc_id);
3069
3070
WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071
QM_ARB_ERR_MSG_EN_MASK);
3072
3073
/* Set timeout to maximum */
3074
WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3075
3076
WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3077
WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3078
QMAN_INTERNAL_MAKE_TRUSTED);
3079
}
3080
3081
WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3082
WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3083
WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3084
WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3085
3086
/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3087
if (tpc_id == 6) {
3088
WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3089
mtr_base_ws_lo);
3090
WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3091
mtr_base_ws_hi);
3092
WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3093
so_base_ws_lo);
3094
WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3095
so_base_ws_hi);
3096
}
3097
}
3098
3099
static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3100
{
3101
struct gaudi_device *gaudi = hdev->asic_specific;
3102
struct gaudi_internal_qman_info *q;
3103
u64 qman_base_addr;
3104
u32 so_base_hi, tpc_offset = 0;
3105
u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3106
mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3107
int i, tpc_id, internal_q_index;
3108
3109
if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3110
return;
3111
3112
so_base_hi = upper_32_bits(CFG_BASE +
3113
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3114
3115
for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3116
for (i = 0 ; i < QMAN_STREAMS ; i++) {
3117
internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3118
tpc_id * QMAN_STREAMS + i;
3119
q = &gaudi->internal_qmans[internal_q_index];
3120
qman_base_addr = (u64) q->pq_dma_addr;
3121
gaudi_init_tpc_qman(hdev, tpc_offset, i,
3122
qman_base_addr);
3123
3124
if (i == 3) {
3125
/* Initializing lower CP for TPC QMAN */
3126
gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3127
3128
/* Enable the QMAN and TPC channel */
3129
WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3130
QMAN_TPC_ENABLE);
3131
}
3132
}
3133
3134
WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3135
so_base_hi);
3136
3137
tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3138
3139
gaudi->hw_cap_initialized |=
3140
FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3141
}
3142
}
3143
3144
static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3145
int qman_id, u64 qman_base_addr, int nic_id)
3146
{
3147
struct cpu_dyn_regs *dyn_regs =
3148
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3149
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3150
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3151
u32 nic_qm_err_cfg, irq_handler_offset;
3152
u32 q_off;
3153
3154
mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3155
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3156
mtr_base_en_hi = upper_32_bits(CFG_BASE +
3157
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3158
so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3159
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3160
so_base_en_hi = upper_32_bits(CFG_BASE +
3161
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3162
mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3163
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3164
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3165
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3166
so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3167
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3168
so_base_ws_hi = upper_32_bits(CFG_BASE +
3169
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3170
3171
q_off = nic_offset + qman_id * 4;
3172
3173
WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3174
WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3175
3176
WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3177
WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3178
WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3179
3180
WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3181
QMAN_LDMA_SIZE_OFFSET);
3182
WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3183
QMAN_LDMA_SRC_OFFSET);
3184
WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3185
QMAN_LDMA_DST_OFFSET);
3186
3187
WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3188
WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3189
WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3190
WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3191
3192
/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3193
WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3194
WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3195
WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3196
WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3197
3198
if (qman_id == 0) {
3199
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3200
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3201
le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3202
3203
/* Configure RAZWI IRQ */
3204
nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3205
if (hdev->stop_on_err)
3206
nic_qm_err_cfg |=
3207
NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3208
3209
WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3210
3211
WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3212
lower_32_bits(CFG_BASE + irq_handler_offset));
3213
WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3214
upper_32_bits(CFG_BASE + irq_handler_offset));
3215
3216
WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3217
gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3218
nic_id);
3219
3220
WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3221
QM_ARB_ERR_MSG_EN_MASK);
3222
3223
/* Set timeout to maximum */
3224
WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3225
3226
WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3227
WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3228
QMAN_INTERNAL_MAKE_TRUSTED);
3229
}
3230
}
3231
3232
static void gaudi_init_nic_qmans(struct hl_device *hdev)
3233
{
3234
struct gaudi_device *gaudi = hdev->asic_specific;
3235
struct gaudi_internal_qman_info *q;
3236
u64 qman_base_addr;
3237
u32 nic_offset = 0;
3238
u32 nic_delta_between_qmans =
3239
mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3240
u32 nic_delta_between_nics =
3241
mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242
int i, nic_id, internal_q_index;
3243
3244
if (!hdev->nic_ports_mask)
3245
return;
3246
3247
if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3248
return;
3249
3250
dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3251
3252
for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3253
if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3254
nic_offset += nic_delta_between_qmans;
3255
if (nic_id & 1) {
3256
nic_offset -= (nic_delta_between_qmans * 2);
3257
nic_offset += nic_delta_between_nics;
3258
}
3259
continue;
3260
}
3261
3262
for (i = 0 ; i < QMAN_STREAMS ; i++) {
3263
internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3264
nic_id * QMAN_STREAMS + i;
3265
q = &gaudi->internal_qmans[internal_q_index];
3266
qman_base_addr = (u64) q->pq_dma_addr;
3267
gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3268
qman_base_addr, nic_id);
3269
}
3270
3271
/* Enable the QMAN */
3272
WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3273
3274
nic_offset += nic_delta_between_qmans;
3275
if (nic_id & 1) {
3276
nic_offset -= (nic_delta_between_qmans * 2);
3277
nic_offset += nic_delta_between_nics;
3278
}
3279
3280
gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3281
}
3282
}
3283
3284
static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3285
{
3286
struct gaudi_device *gaudi = hdev->asic_specific;
3287
3288
if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3289
return;
3290
3291
WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3292
WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3293
WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3294
}
3295
3296
static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3297
{
3298
struct gaudi_device *gaudi = hdev->asic_specific;
3299
3300
if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3301
return;
3302
3303
WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3304
WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3305
WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3306
WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3307
WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3308
}
3309
3310
static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3311
{
3312
struct gaudi_device *gaudi = hdev->asic_specific;
3313
3314
if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3315
return;
3316
3317
WREG32(mmMME2_QM_GLBL_CFG0, 0);
3318
WREG32(mmMME0_QM_GLBL_CFG0, 0);
3319
}
3320
3321
static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3322
{
3323
struct gaudi_device *gaudi = hdev->asic_specific;
3324
u32 tpc_offset = 0;
3325
int tpc_id;
3326
3327
if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3328
return;
3329
3330
for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3331
WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3332
tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3333
}
3334
}
3335
3336
static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3337
{
3338
struct gaudi_device *gaudi = hdev->asic_specific;
3339
u32 nic_mask, nic_offset = 0;
3340
u32 nic_delta_between_qmans =
3341
mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3342
u32 nic_delta_between_nics =
3343
mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344
int nic_id;
3345
3346
for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3347
nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3348
3349
if (gaudi->hw_cap_initialized & nic_mask)
3350
WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3351
3352
nic_offset += nic_delta_between_qmans;
3353
if (nic_id & 1) {
3354
nic_offset -= (nic_delta_between_qmans * 2);
3355
nic_offset += nic_delta_between_nics;
3356
}
3357
}
3358
}
3359
3360
static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3361
{
3362
struct gaudi_device *gaudi = hdev->asic_specific;
3363
3364
if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3365
return;
3366
3367
/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3368
WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369
WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370
WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371
}
3372
3373
static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3374
{
3375
struct gaudi_device *gaudi = hdev->asic_specific;
3376
3377
if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3378
return;
3379
3380
/* Stop CPs of HBM DMA QMANs */
3381
3382
WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383
WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384
WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385
WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386
WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387
}
3388
3389
static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3390
{
3391
struct gaudi_device *gaudi = hdev->asic_specific;
3392
3393
if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3394
return;
3395
3396
/* Stop CPs of MME QMANs */
3397
WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398
WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3399
}
3400
3401
static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3402
{
3403
struct gaudi_device *gaudi = hdev->asic_specific;
3404
3405
if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3406
return;
3407
3408
WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409
WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410
WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411
WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412
WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413
WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414
WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415
WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416
}
3417
3418
static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3419
{
3420
struct gaudi_device *gaudi = hdev->asic_specific;
3421
3422
/* Stop upper CPs of QMANs */
3423
3424
if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3425
WREG32(mmNIC0_QM0_GLBL_CFG1,
3426
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3427
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3428
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3429
3430
if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3431
WREG32(mmNIC0_QM1_GLBL_CFG1,
3432
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3433
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3434
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3435
3436
if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3437
WREG32(mmNIC1_QM0_GLBL_CFG1,
3438
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3439
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3440
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3441
3442
if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3443
WREG32(mmNIC1_QM1_GLBL_CFG1,
3444
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3445
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3446
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3447
3448
if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3449
WREG32(mmNIC2_QM0_GLBL_CFG1,
3450
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3451
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3452
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3453
3454
if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3455
WREG32(mmNIC2_QM1_GLBL_CFG1,
3456
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3457
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3458
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3459
3460
if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3461
WREG32(mmNIC3_QM0_GLBL_CFG1,
3462
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3463
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3464
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3465
3466
if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3467
WREG32(mmNIC3_QM1_GLBL_CFG1,
3468
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3469
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3470
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3471
3472
if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3473
WREG32(mmNIC4_QM0_GLBL_CFG1,
3474
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3475
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3476
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3477
3478
if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3479
WREG32(mmNIC4_QM1_GLBL_CFG1,
3480
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3481
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3482
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3483
}
3484
3485
static void gaudi_pci_dma_stall(struct hl_device *hdev)
3486
{
3487
struct gaudi_device *gaudi = hdev->asic_specific;
3488
3489
if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3490
return;
3491
3492
WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493
WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494
WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495
}
3496
3497
static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3498
{
3499
struct gaudi_device *gaudi = hdev->asic_specific;
3500
3501
if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3502
return;
3503
3504
WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505
WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506
WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507
WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508
WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509
}
3510
3511
static void gaudi_mme_stall(struct hl_device *hdev)
3512
{
3513
struct gaudi_device *gaudi = hdev->asic_specific;
3514
3515
if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3516
return;
3517
3518
/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3519
WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520
WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3521
WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522
WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3523
WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524
WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525
WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526
WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527
WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528
WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529
WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530
WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531
WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532
WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533
WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534
WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535
}
3536
3537
static void gaudi_tpc_stall(struct hl_device *hdev)
3538
{
3539
struct gaudi_device *gaudi = hdev->asic_specific;
3540
3541
if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3542
return;
3543
3544
WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545
WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546
WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547
WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548
WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549
WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550
WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551
WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552
}
3553
3554
static void gaudi_disable_clock_gating(struct hl_device *hdev)
3555
{
3556
u32 qman_offset;
3557
int i;
3558
3559
if (hdev->asic_prop.fw_security_enabled)
3560
return;
3561
3562
for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3563
WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3564
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3565
3566
qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3567
}
3568
3569
WREG32(mmMME0_QM_CGM_CFG, 0);
3570
WREG32(mmMME0_QM_CGM_CFG1, 0);
3571
WREG32(mmMME2_QM_CGM_CFG, 0);
3572
WREG32(mmMME2_QM_CGM_CFG1, 0);
3573
3574
for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3575
WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3576
WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3577
3578
qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3579
}
3580
}
3581
3582
static void gaudi_enable_timestamp(struct hl_device *hdev)
3583
{
3584
/* Disable the timestamp counter */
3585
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3586
3587
/* Zero the lower/upper parts of the 64-bit counter */
3588
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3589
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3590
3591
/* Enable the counter */
3592
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3593
}
3594
3595
static void gaudi_disable_timestamp(struct hl_device *hdev)
3596
{
3597
/* Disable the timestamp counter */
3598
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3599
}
3600
3601
static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3602
{
3603
u32 wait_timeout_ms;
3604
3605
if (hdev->pldm)
3606
wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3607
else
3608
wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3609
3610
if (fw_reset)
3611
goto skip_engines;
3612
3613
gaudi_stop_nic_qmans(hdev);
3614
gaudi_stop_mme_qmans(hdev);
3615
gaudi_stop_tpc_qmans(hdev);
3616
gaudi_stop_hbm_dma_qmans(hdev);
3617
gaudi_stop_pci_dma_qmans(hdev);
3618
3619
msleep(wait_timeout_ms);
3620
3621
gaudi_pci_dma_stall(hdev);
3622
gaudi_hbm_dma_stall(hdev);
3623
gaudi_tpc_stall(hdev);
3624
gaudi_mme_stall(hdev);
3625
3626
msleep(wait_timeout_ms);
3627
3628
gaudi_disable_nic_qmans(hdev);
3629
gaudi_disable_mme_qmans(hdev);
3630
gaudi_disable_tpc_qmans(hdev);
3631
gaudi_disable_hbm_dma_qmans(hdev);
3632
gaudi_disable_pci_dma_qmans(hdev);
3633
3634
gaudi_disable_timestamp(hdev);
3635
3636
skip_engines:
3637
gaudi_disable_msi(hdev);
3638
}
3639
3640
static int gaudi_mmu_init(struct hl_device *hdev)
3641
{
3642
struct asic_fixed_properties *prop = &hdev->asic_prop;
3643
struct gaudi_device *gaudi = hdev->asic_specific;
3644
u64 hop0_addr;
3645
int rc, i;
3646
3647
if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3648
return 0;
3649
3650
for (i = 0 ; i < prop->max_asid ; i++) {
3651
hop0_addr = prop->mmu_pgt_addr +
3652
(i * prop->dmmu.hop_table_size);
3653
3654
rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3655
if (rc) {
3656
dev_err(hdev->dev,
3657
"failed to set hop0 addr for asid %d\n", i);
3658
return rc;
3659
}
3660
}
3661
3662
/* init MMU cache manage page */
3663
WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3664
WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3665
3666
/* mem cache invalidation */
3667
WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3668
3669
rc = hl_mmu_invalidate_cache(hdev, true, 0);
3670
if (rc)
3671
return rc;
3672
3673
WREG32(mmMMU_UP_MMU_ENABLE, 1);
3674
WREG32(mmMMU_UP_SPI_MASK, 0xF);
3675
3676
WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3677
3678
/*
3679
* The H/W expects the first PI after init to be 1. After wraparound
3680
* we'll write 0.
3681
*/
3682
gaudi->mmu_cache_inv_pi = 1;
3683
3684
gaudi->hw_cap_initialized |= HW_CAP_MMU;
3685
3686
return 0;
3687
}
3688
3689
static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3690
{
3691
void __iomem *dst;
3692
3693
dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3694
3695
return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3696
}
3697
3698
static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3699
{
3700
void __iomem *dst;
3701
3702
dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3703
3704
return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3705
}
3706
3707
static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3708
{
3709
struct dynamic_fw_load_mgr *dynamic_loader;
3710
struct cpu_dyn_regs *dyn_regs;
3711
3712
dynamic_loader = &hdev->fw_loader.dynamic_loader;
3713
3714
/*
3715
* here we update initial values for few specific dynamic regs (as
3716
* before reading the first descriptor from FW those value has to be
3717
* hard-coded) in later stages of the protocol those values will be
3718
* updated automatically by reading the FW descriptor so data there
3719
* will always be up-to-date
3720
*/
3721
dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3722
dyn_regs->kmd_msg_to_cpu =
3723
cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3724
dyn_regs->cpu_cmd_status_to_host =
3725
cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3726
3727
dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3728
}
3729
3730
static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3731
{
3732
struct static_fw_load_mgr *static_loader;
3733
3734
static_loader = &hdev->fw_loader.static_loader;
3735
3736
static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737
static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3738
static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3739
static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3740
static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3741
static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3742
static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3743
static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3744
static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3745
static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3746
static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3747
static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3748
static_loader->cpu_reset_wait_msec = hdev->pldm ?
3749
GAUDI_PLDM_RESET_WAIT_MSEC :
3750
GAUDI_CPU_RESET_WAIT_MSEC;
3751
}
3752
3753
static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3754
{
3755
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3756
3757
pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3758
pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3759
pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3760
pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3761
pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3762
pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3763
}
3764
3765
static void gaudi_init_firmware_loader(struct hl_device *hdev)
3766
{
3767
struct asic_fixed_properties *prop = &hdev->asic_prop;
3768
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3769
3770
/* fill common fields */
3771
fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3772
fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3773
fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3774
fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3775
fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3776
fw_loader->skip_bmc = !hdev->bmc_enable;
3777
fw_loader->sram_bar_id = SRAM_BAR_ID;
3778
fw_loader->dram_bar_id = HBM_BAR_ID;
3779
3780
if (prop->dynamic_fw_load)
3781
gaudi_init_dynamic_firmware_loader(hdev);
3782
else
3783
gaudi_init_static_firmware_loader(hdev);
3784
}
3785
3786
static int gaudi_init_cpu(struct hl_device *hdev)
3787
{
3788
struct gaudi_device *gaudi = hdev->asic_specific;
3789
int rc;
3790
3791
if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3792
return 0;
3793
3794
if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3795
return 0;
3796
3797
/*
3798
* The device CPU works with 40 bits addresses.
3799
* This register sets the extension to 50 bits.
3800
*/
3801
if (!hdev->asic_prop.fw_security_enabled)
3802
WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3803
3804
rc = hl_fw_init_cpu(hdev);
3805
3806
if (rc)
3807
return rc;
3808
3809
gaudi->hw_cap_initialized |= HW_CAP_CPU;
3810
3811
return 0;
3812
}
3813
3814
static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3815
{
3816
struct cpu_dyn_regs *dyn_regs =
3817
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3818
struct asic_fixed_properties *prop = &hdev->asic_prop;
3819
struct gaudi_device *gaudi = hdev->asic_specific;
3820
u32 status, irq_handler_offset;
3821
struct hl_eq *eq;
3822
struct hl_hw_queue *cpu_pq =
3823
&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3824
int err;
3825
3826
if (!hdev->cpu_queues_enable)
3827
return 0;
3828
3829
if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3830
return 0;
3831
3832
eq = &hdev->event_queue;
3833
3834
WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3835
WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3836
3837
WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3838
WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3839
3840
WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3841
lower_32_bits(hdev->cpu_accessible_dma_address));
3842
WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3843
upper_32_bits(hdev->cpu_accessible_dma_address));
3844
3845
WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3846
WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3847
WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3848
3849
/* Used for EQ CI */
3850
WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3851
3852
WREG32(mmCPU_IF_PF_PQ_PI, 0);
3853
3854
WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3855
3856
irq_handler_offset = prop->gic_interrupts_enable ?
3857
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3858
le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3859
3860
WREG32(irq_handler_offset,
3861
gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3862
3863
err = hl_poll_timeout(
3864
hdev,
3865
mmCPU_IF_QUEUE_INIT,
3866
status,
3867
(status == PQ_INIT_STATUS_READY_FOR_HOST),
3868
1000,
3869
cpu_timeout);
3870
3871
if (err) {
3872
dev_err(hdev->dev,
3873
"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3874
return -EIO;
3875
}
3876
3877
/* update FW application security bits */
3878
if (prop->fw_cpu_boot_dev_sts0_valid)
3879
prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3880
if (prop->fw_cpu_boot_dev_sts1_valid)
3881
prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3882
3883
gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3884
return 0;
3885
}
3886
3887
static void gaudi_pre_hw_init(struct hl_device *hdev)
3888
{
3889
/* Perform read from the device to make sure device is up */
3890
RREG32(mmHW_STATE);
3891
3892
if (!hdev->asic_prop.fw_security_enabled) {
3893
/* Set the access through PCI bars (Linux driver only) as
3894
* secured
3895
*/
3896
WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3897
(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3898
PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3899
3900
/* Perform read to flush the waiting writes to ensure
3901
* configuration was set in the device
3902
*/
3903
RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3904
}
3905
3906
/*
3907
* Let's mark in the H/W that we have reached this point. We check
3908
* this value in the reset_before_init function to understand whether
3909
* we need to reset the chip before doing H/W init. This register is
3910
* cleared by the H/W upon H/W reset
3911
*/
3912
WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3913
}
3914
3915
static int gaudi_hw_init(struct hl_device *hdev)
3916
{
3917
struct gaudi_device *gaudi = hdev->asic_specific;
3918
int rc;
3919
3920
gaudi_pre_hw_init(hdev);
3921
3922
/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3923
* So we set it here and if anyone tries to move it later to
3924
* a different address, there will be an error
3925
*/
3926
if (hdev->asic_prop.iatu_done_by_fw)
3927
gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3928
3929
/*
3930
* Before pushing u-boot/linux to device, need to set the hbm bar to
3931
* base address of dram
3932
*/
3933
if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3934
dev_err(hdev->dev,
3935
"failed to map HBM bar to DRAM base address\n");
3936
return -EIO;
3937
}
3938
3939
rc = gaudi_init_cpu(hdev);
3940
if (rc) {
3941
dev_err(hdev->dev, "failed to initialize CPU\n");
3942
return rc;
3943
}
3944
3945
/* In case the clock gating was enabled in preboot we need to disable
3946
* it here before touching the MME/TPC registers.
3947
*/
3948
gaudi_disable_clock_gating(hdev);
3949
3950
/* SRAM scrambler must be initialized after CPU is running from HBM */
3951
gaudi_init_scrambler_sram(hdev);
3952
3953
/* This is here just in case we are working without CPU */
3954
gaudi_init_scrambler_hbm(hdev);
3955
3956
gaudi_init_golden_registers(hdev);
3957
3958
rc = gaudi_mmu_init(hdev);
3959
if (rc)
3960
return rc;
3961
3962
gaudi_init_security(hdev);
3963
3964
gaudi_init_pci_dma_qmans(hdev);
3965
3966
gaudi_init_hbm_dma_qmans(hdev);
3967
3968
gaudi_init_mme_qmans(hdev);
3969
3970
gaudi_init_tpc_qmans(hdev);
3971
3972
gaudi_init_nic_qmans(hdev);
3973
3974
gaudi_enable_timestamp(hdev);
3975
3976
/* MSI must be enabled before CPU queues and NIC are initialized */
3977
rc = gaudi_enable_msi(hdev);
3978
if (rc)
3979
goto disable_queues;
3980
3981
/* must be called after MSI was enabled */
3982
rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3983
if (rc) {
3984
dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3985
rc);
3986
goto disable_msi;
3987
}
3988
3989
/* Perform read from the device to flush all configuration */
3990
RREG32(mmHW_STATE);
3991
3992
return 0;
3993
3994
disable_msi:
3995
gaudi_disable_msi(hdev);
3996
disable_queues:
3997
gaudi_disable_mme_qmans(hdev);
3998
gaudi_disable_pci_dma_qmans(hdev);
3999
4000
return rc;
4001
}
4002
4003
static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4004
{
4005
struct cpu_dyn_regs *dyn_regs =
4006
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4007
u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4008
struct gaudi_device *gaudi = hdev->asic_specific;
4009
bool driver_performs_reset;
4010
4011
if (!hard_reset) {
4012
dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4013
return 0;
4014
}
4015
4016
if (hdev->pldm) {
4017
reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4018
cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4019
} else {
4020
reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4021
cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4022
}
4023
4024
if (fw_reset) {
4025
dev_dbg(hdev->dev,
4026
"Firmware performs HARD reset, going to wait %dms\n",
4027
reset_timeout_ms);
4028
4029
goto skip_reset;
4030
}
4031
4032
driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4033
!hdev->asic_prop.hard_reset_done_by_fw);
4034
4035
/* Set device to handle FLR by H/W as we will put the device CPU to
4036
* halt mode
4037
*/
4038
if (driver_performs_reset)
4039
WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4040
PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4041
4042
/* If linux is loaded in the device CPU we need to communicate with it
4043
* via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4044
* registers in case of old F/Ws
4045
*/
4046
if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4047
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4048
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4049
le32_to_cpu(dyn_regs->gic_host_halt_irq);
4050
4051
WREG32(irq_handler_offset,
4052
gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4053
4054
/* This is a hail-mary attempt to revive the card in the small chance that the
4055
* f/w has experienced a watchdog event, which caused it to return back to preboot.
4056
* In that case, triggering reset through GIC won't help. We need to trigger the
4057
* reset as if Linux wasn't loaded.
4058
*
4059
* We do it only if the reset cause was HB, because that would be the indication
4060
* of such an event.
4061
*
4062
* In case watchdog hasn't expired but we still got HB, then this won't do any
4063
* damage.
4064
*/
4065
if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4066
if (hdev->asic_prop.hard_reset_done_by_fw)
4067
hl_fw_ask_hard_reset_without_linux(hdev);
4068
else
4069
hl_fw_ask_halt_machine_without_linux(hdev);
4070
}
4071
} else {
4072
if (hdev->asic_prop.hard_reset_done_by_fw)
4073
hl_fw_ask_hard_reset_without_linux(hdev);
4074
else
4075
hl_fw_ask_halt_machine_without_linux(hdev);
4076
}
4077
4078
if (driver_performs_reset) {
4079
4080
/* Configure the reset registers. Must be done as early as
4081
* possible in case we fail during H/W initialization
4082
*/
4083
WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4084
(CFG_RST_H_DMA_MASK |
4085
CFG_RST_H_MME_MASK |
4086
CFG_RST_H_SM_MASK |
4087
CFG_RST_H_TPC_7_MASK));
4088
4089
WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4090
4091
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4092
(CFG_RST_H_HBM_MASK |
4093
CFG_RST_H_TPC_7_MASK |
4094
CFG_RST_H_NIC_MASK |
4095
CFG_RST_H_SM_MASK |
4096
CFG_RST_H_DMA_MASK |
4097
CFG_RST_H_MME_MASK |
4098
CFG_RST_H_CPU_MASK |
4099
CFG_RST_H_MMU_MASK));
4100
4101
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4102
(CFG_RST_L_IF_MASK |
4103
CFG_RST_L_PSOC_MASK |
4104
CFG_RST_L_TPC_MASK));
4105
4106
msleep(cpu_timeout_ms);
4107
4108
/* Tell ASIC not to re-initialize PCIe */
4109
WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4110
4111
/* Restart BTL/BLR upon hard-reset */
4112
WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4113
4114
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4115
1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4116
4117
dev_dbg(hdev->dev,
4118
"Issued HARD reset command, going to wait %dms\n",
4119
reset_timeout_ms);
4120
} else {
4121
dev_dbg(hdev->dev,
4122
"Firmware performs HARD reset, going to wait %dms\n",
4123
reset_timeout_ms);
4124
}
4125
4126
skip_reset:
4127
/*
4128
* After hard reset, we can't poll the BTM_FSM register because the PSOC
4129
* itself is in reset. Need to wait until the reset is deasserted
4130
*/
4131
msleep(reset_timeout_ms);
4132
4133
status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4134
if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4135
dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4136
return -ETIMEDOUT;
4137
}
4138
4139
if (gaudi) {
4140
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4141
HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4142
HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4143
HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4144
HW_CAP_HBM_SCRAMBLER);
4145
4146
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4147
4148
hdev->device_cpu_is_halted = false;
4149
}
4150
return 0;
4151
}
4152
4153
static int gaudi_suspend(struct hl_device *hdev)
4154
{
4155
return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4156
}
4157
4158
static int gaudi_resume(struct hl_device *hdev)
4159
{
4160
return gaudi_init_iatu(hdev);
4161
}
4162
4163
static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4164
void *cpu_addr, dma_addr_t dma_addr, size_t size)
4165
{
4166
int rc;
4167
4168
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4169
VM_DONTCOPY | VM_NORESERVE);
4170
4171
#ifdef _HAS_DMA_MMAP_COHERENT
4172
/*
4173
* If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
4174
* so vm_insert_page() can handle it safely. Without this, the kernel
4175
* may BUG_ON due to VM_PFNMAP.
4176
*/
4177
if (is_vmalloc_addr(cpu_addr))
4178
vm_flags_set(vma, VM_MIXEDMAP);
4179
4180
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4181
(dma_addr - HOST_PHYS_BASE), size);
4182
if (rc)
4183
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4184
#else
4185
4186
rc = remap_pfn_range(vma, vma->vm_start,
4187
virt_to_phys(cpu_addr) >> PAGE_SHIFT,
4188
size, vma->vm_page_prot);
4189
if (rc)
4190
dev_err(hdev->dev, "remap_pfn_range error %d", rc);
4191
4192
#endif
4193
4194
4195
return rc;
4196
}
4197
4198
static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4199
{
4200
struct cpu_dyn_regs *dyn_regs =
4201
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4202
u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4203
struct gaudi_device *gaudi = hdev->asic_specific;
4204
bool invalid_queue = false;
4205
int dma_id;
4206
4207
switch (hw_queue_id) {
4208
case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4209
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4210
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4211
q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4212
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4213
break;
4214
4215
case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4216
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4217
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4218
q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4219
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4220
break;
4221
4222
case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4223
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4224
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4225
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4226
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4227
break;
4228
4229
case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4230
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4231
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4232
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4233
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4234
break;
4235
4236
case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4237
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4238
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4239
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4240
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4241
break;
4242
4243
case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4244
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4245
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4246
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4247
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4248
break;
4249
4250
case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4251
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4252
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4253
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4254
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4255
break;
4256
4257
case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4258
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4259
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4260
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4261
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4262
break;
4263
4264
case GAUDI_QUEUE_ID_CPU_PQ:
4265
if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4266
db_reg_offset = mmCPU_IF_PF_PQ_PI;
4267
else
4268
invalid_queue = true;
4269
break;
4270
4271
case GAUDI_QUEUE_ID_MME_0_0:
4272
db_reg_offset = mmMME2_QM_PQ_PI_0;
4273
break;
4274
4275
case GAUDI_QUEUE_ID_MME_0_1:
4276
db_reg_offset = mmMME2_QM_PQ_PI_1;
4277
break;
4278
4279
case GAUDI_QUEUE_ID_MME_0_2:
4280
db_reg_offset = mmMME2_QM_PQ_PI_2;
4281
break;
4282
4283
case GAUDI_QUEUE_ID_MME_0_3:
4284
db_reg_offset = mmMME2_QM_PQ_PI_3;
4285
break;
4286
4287
case GAUDI_QUEUE_ID_MME_1_0:
4288
db_reg_offset = mmMME0_QM_PQ_PI_0;
4289
break;
4290
4291
case GAUDI_QUEUE_ID_MME_1_1:
4292
db_reg_offset = mmMME0_QM_PQ_PI_1;
4293
break;
4294
4295
case GAUDI_QUEUE_ID_MME_1_2:
4296
db_reg_offset = mmMME0_QM_PQ_PI_2;
4297
break;
4298
4299
case GAUDI_QUEUE_ID_MME_1_3:
4300
db_reg_offset = mmMME0_QM_PQ_PI_3;
4301
break;
4302
4303
case GAUDI_QUEUE_ID_TPC_0_0:
4304
db_reg_offset = mmTPC0_QM_PQ_PI_0;
4305
break;
4306
4307
case GAUDI_QUEUE_ID_TPC_0_1:
4308
db_reg_offset = mmTPC0_QM_PQ_PI_1;
4309
break;
4310
4311
case GAUDI_QUEUE_ID_TPC_0_2:
4312
db_reg_offset = mmTPC0_QM_PQ_PI_2;
4313
break;
4314
4315
case GAUDI_QUEUE_ID_TPC_0_3:
4316
db_reg_offset = mmTPC0_QM_PQ_PI_3;
4317
break;
4318
4319
case GAUDI_QUEUE_ID_TPC_1_0:
4320
db_reg_offset = mmTPC1_QM_PQ_PI_0;
4321
break;
4322
4323
case GAUDI_QUEUE_ID_TPC_1_1:
4324
db_reg_offset = mmTPC1_QM_PQ_PI_1;
4325
break;
4326
4327
case GAUDI_QUEUE_ID_TPC_1_2:
4328
db_reg_offset = mmTPC1_QM_PQ_PI_2;
4329
break;
4330
4331
case GAUDI_QUEUE_ID_TPC_1_3:
4332
db_reg_offset = mmTPC1_QM_PQ_PI_3;
4333
break;
4334
4335
case GAUDI_QUEUE_ID_TPC_2_0:
4336
db_reg_offset = mmTPC2_QM_PQ_PI_0;
4337
break;
4338
4339
case GAUDI_QUEUE_ID_TPC_2_1:
4340
db_reg_offset = mmTPC2_QM_PQ_PI_1;
4341
break;
4342
4343
case GAUDI_QUEUE_ID_TPC_2_2:
4344
db_reg_offset = mmTPC2_QM_PQ_PI_2;
4345
break;
4346
4347
case GAUDI_QUEUE_ID_TPC_2_3:
4348
db_reg_offset = mmTPC2_QM_PQ_PI_3;
4349
break;
4350
4351
case GAUDI_QUEUE_ID_TPC_3_0:
4352
db_reg_offset = mmTPC3_QM_PQ_PI_0;
4353
break;
4354
4355
case GAUDI_QUEUE_ID_TPC_3_1:
4356
db_reg_offset = mmTPC3_QM_PQ_PI_1;
4357
break;
4358
4359
case GAUDI_QUEUE_ID_TPC_3_2:
4360
db_reg_offset = mmTPC3_QM_PQ_PI_2;
4361
break;
4362
4363
case GAUDI_QUEUE_ID_TPC_3_3:
4364
db_reg_offset = mmTPC3_QM_PQ_PI_3;
4365
break;
4366
4367
case GAUDI_QUEUE_ID_TPC_4_0:
4368
db_reg_offset = mmTPC4_QM_PQ_PI_0;
4369
break;
4370
4371
case GAUDI_QUEUE_ID_TPC_4_1:
4372
db_reg_offset = mmTPC4_QM_PQ_PI_1;
4373
break;
4374
4375
case GAUDI_QUEUE_ID_TPC_4_2:
4376
db_reg_offset = mmTPC4_QM_PQ_PI_2;
4377
break;
4378
4379
case GAUDI_QUEUE_ID_TPC_4_3:
4380
db_reg_offset = mmTPC4_QM_PQ_PI_3;
4381
break;
4382
4383
case GAUDI_QUEUE_ID_TPC_5_0:
4384
db_reg_offset = mmTPC5_QM_PQ_PI_0;
4385
break;
4386
4387
case GAUDI_QUEUE_ID_TPC_5_1:
4388
db_reg_offset = mmTPC5_QM_PQ_PI_1;
4389
break;
4390
4391
case GAUDI_QUEUE_ID_TPC_5_2:
4392
db_reg_offset = mmTPC5_QM_PQ_PI_2;
4393
break;
4394
4395
case GAUDI_QUEUE_ID_TPC_5_3:
4396
db_reg_offset = mmTPC5_QM_PQ_PI_3;
4397
break;
4398
4399
case GAUDI_QUEUE_ID_TPC_6_0:
4400
db_reg_offset = mmTPC6_QM_PQ_PI_0;
4401
break;
4402
4403
case GAUDI_QUEUE_ID_TPC_6_1:
4404
db_reg_offset = mmTPC6_QM_PQ_PI_1;
4405
break;
4406
4407
case GAUDI_QUEUE_ID_TPC_6_2:
4408
db_reg_offset = mmTPC6_QM_PQ_PI_2;
4409
break;
4410
4411
case GAUDI_QUEUE_ID_TPC_6_3:
4412
db_reg_offset = mmTPC6_QM_PQ_PI_3;
4413
break;
4414
4415
case GAUDI_QUEUE_ID_TPC_7_0:
4416
db_reg_offset = mmTPC7_QM_PQ_PI_0;
4417
break;
4418
4419
case GAUDI_QUEUE_ID_TPC_7_1:
4420
db_reg_offset = mmTPC7_QM_PQ_PI_1;
4421
break;
4422
4423
case GAUDI_QUEUE_ID_TPC_7_2:
4424
db_reg_offset = mmTPC7_QM_PQ_PI_2;
4425
break;
4426
4427
case GAUDI_QUEUE_ID_TPC_7_3:
4428
db_reg_offset = mmTPC7_QM_PQ_PI_3;
4429
break;
4430
4431
case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4432
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4433
invalid_queue = true;
4434
4435
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4436
db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4437
break;
4438
4439
case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4440
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4441
invalid_queue = true;
4442
4443
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4444
db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4445
break;
4446
4447
case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4448
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4449
invalid_queue = true;
4450
4451
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4452
db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4453
break;
4454
4455
case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4456
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4457
invalid_queue = true;
4458
4459
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4460
db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4461
break;
4462
4463
case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4464
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4465
invalid_queue = true;
4466
4467
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4468
db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4469
break;
4470
4471
case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4472
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4473
invalid_queue = true;
4474
4475
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4476
db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4477
break;
4478
4479
case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4480
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4481
invalid_queue = true;
4482
4483
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4484
db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4485
break;
4486
4487
case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4488
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4489
invalid_queue = true;
4490
4491
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4492
db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4493
break;
4494
4495
case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4496
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4497
invalid_queue = true;
4498
4499
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4500
db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4501
break;
4502
4503
case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4504
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4505
invalid_queue = true;
4506
4507
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4508
db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4509
break;
4510
4511
default:
4512
invalid_queue = true;
4513
}
4514
4515
if (invalid_queue) {
4516
/* Should never get here */
4517
dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4518
hw_queue_id);
4519
return;
4520
}
4521
4522
db_value = pi;
4523
4524
/* ring the doorbell */
4525
WREG32(db_reg_offset, db_value);
4526
4527
if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4528
/* make sure device CPU will read latest data from host */
4529
mb();
4530
4531
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4532
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4533
le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4534
4535
WREG32(irq_handler_offset,
4536
gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4537
}
4538
}
4539
4540
static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4541
struct hl_bd *bd)
4542
{
4543
__le64 *pbd = (__le64 *) bd;
4544
4545
/* The QMANs are on the host memory so a simple copy suffice */
4546
pqe[0] = pbd[0];
4547
pqe[1] = pbd[1];
4548
}
4549
4550
static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4551
dma_addr_t *dma_handle, gfp_t flags)
4552
{
4553
void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4554
dma_handle, flags);
4555
4556
/* Shift to the device's base physical address of host memory */
4557
if (kernel_addr)
4558
*dma_handle += HOST_PHYS_BASE;
4559
4560
return kernel_addr;
4561
}
4562
4563
static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4564
void *cpu_addr, dma_addr_t dma_handle)
4565
{
4566
/* Cancel the device's base physical address of host memory */
4567
dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4568
4569
dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4570
}
4571
4572
static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4573
{
4574
struct asic_fixed_properties *prop = &hdev->asic_prop;
4575
u64 cur_addr = prop->dram_user_base_address;
4576
u32 chunk_size, busy;
4577
int rc, dma_id;
4578
4579
while (cur_addr < prop->dram_end_address) {
4580
for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4581
u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4582
4583
chunk_size =
4584
min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4585
4586
dev_dbg(hdev->dev,
4587
"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4588
cur_addr, cur_addr + chunk_size);
4589
4590
WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4591
lower_32_bits(val));
4592
WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4593
upper_32_bits(val));
4594
WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4595
lower_32_bits(cur_addr));
4596
WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4597
upper_32_bits(cur_addr));
4598
WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4599
chunk_size);
4600
WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4601
((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4602
(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4603
4604
cur_addr += chunk_size;
4605
4606
if (cur_addr == prop->dram_end_address)
4607
break;
4608
}
4609
4610
for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4611
u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4612
4613
rc = hl_poll_timeout(
4614
hdev,
4615
mmDMA0_CORE_STS0 + dma_offset,
4616
busy,
4617
((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4618
1000,
4619
HBM_SCRUBBING_TIMEOUT_US);
4620
4621
if (rc) {
4622
dev_err(hdev->dev,
4623
"DMA Timeout during HBM scrubbing of DMA #%d\n",
4624
dma_id);
4625
return -EIO;
4626
}
4627
}
4628
}
4629
4630
return 0;
4631
}
4632
4633
static int gaudi_scrub_device_mem(struct hl_device *hdev)
4634
{
4635
struct asic_fixed_properties *prop = &hdev->asic_prop;
4636
u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4637
u64 addr, size, val = hdev->memory_scrub_val;
4638
ktime_t timeout;
4639
int rc = 0;
4640
4641
if (!hdev->memory_scrub)
4642
return 0;
4643
4644
timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4645
while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4646
if (ktime_compare(ktime_get(), timeout) > 0) {
4647
dev_err(hdev->dev, "waiting for idle timeout\n");
4648
return -ETIMEDOUT;
4649
}
4650
usleep_range((1000 >> 2) + 1, 1000);
4651
}
4652
4653
/* Scrub SRAM */
4654
addr = prop->sram_user_base_address;
4655
size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4656
4657
dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4658
addr, addr + size, val);
4659
rc = gaudi_memset_device_memory(hdev, addr, size, val);
4660
if (rc) {
4661
dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4662
return rc;
4663
}
4664
4665
/* Scrub HBM using all DMA channels in parallel */
4666
rc = gaudi_scrub_device_dram(hdev, val);
4667
if (rc) {
4668
dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4669
return rc;
4670
}
4671
4672
return 0;
4673
}
4674
4675
static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4676
u32 queue_id, dma_addr_t *dma_handle,
4677
u16 *queue_len)
4678
{
4679
struct gaudi_device *gaudi = hdev->asic_specific;
4680
struct gaudi_internal_qman_info *q;
4681
4682
if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4683
gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4684
dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4685
return NULL;
4686
}
4687
4688
q = &gaudi->internal_qmans[queue_id];
4689
*dma_handle = q->pq_dma_addr;
4690
*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4691
4692
return q->pq_kernel_addr;
4693
}
4694
4695
static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4696
u16 len, u32 timeout, u64 *result)
4697
{
4698
struct gaudi_device *gaudi = hdev->asic_specific;
4699
4700
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4701
if (result)
4702
*result = 0;
4703
return 0;
4704
}
4705
4706
if (!timeout)
4707
timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4708
4709
return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4710
timeout, result);
4711
}
4712
4713
static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4714
{
4715
struct packet_msg_prot *fence_pkt;
4716
dma_addr_t pkt_dma_addr;
4717
u32 fence_val, tmp, timeout_usec;
4718
dma_addr_t fence_dma_addr;
4719
u32 *fence_ptr;
4720
int rc;
4721
4722
if (hdev->pldm)
4723
timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4724
else
4725
timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4726
4727
fence_val = GAUDI_QMAN0_FENCE_VAL;
4728
4729
fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4730
if (!fence_ptr) {
4731
dev_err(hdev->dev,
4732
"Failed to allocate memory for H/W queue %d testing\n",
4733
hw_queue_id);
4734
return -ENOMEM;
4735
}
4736
4737
*fence_ptr = 0;
4738
4739
fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4740
&pkt_dma_addr);
4741
if (!fence_pkt) {
4742
dev_err(hdev->dev,
4743
"Failed to allocate packet for H/W queue %d testing\n",
4744
hw_queue_id);
4745
rc = -ENOMEM;
4746
goto free_fence_ptr;
4747
}
4748
4749
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4750
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4751
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4752
4753
fence_pkt->ctl = cpu_to_le32(tmp);
4754
fence_pkt->value = cpu_to_le32(fence_val);
4755
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4756
4757
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4758
sizeof(struct packet_msg_prot),
4759
pkt_dma_addr);
4760
if (rc) {
4761
dev_err(hdev->dev,
4762
"Failed to send fence packet to H/W queue %d\n",
4763
hw_queue_id);
4764
goto free_pkt;
4765
}
4766
4767
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4768
1000, timeout_usec, true);
4769
4770
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4771
4772
if (rc == -ETIMEDOUT) {
4773
dev_err(hdev->dev,
4774
"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4775
hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4776
rc = -EIO;
4777
}
4778
4779
free_pkt:
4780
hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4781
free_fence_ptr:
4782
hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4783
return rc;
4784
}
4785
4786
static int gaudi_test_cpu_queue(struct hl_device *hdev)
4787
{
4788
struct gaudi_device *gaudi = hdev->asic_specific;
4789
4790
/*
4791
* check capability here as send_cpu_message() won't update the result
4792
* value if no capability
4793
*/
4794
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4795
return 0;
4796
4797
return hl_fw_test_cpu_queue(hdev);
4798
}
4799
4800
static int gaudi_test_queues(struct hl_device *hdev)
4801
{
4802
int i, rc, ret_val = 0;
4803
4804
for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4805
if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4806
rc = gaudi_test_queue(hdev, i);
4807
if (rc)
4808
ret_val = -EINVAL;
4809
}
4810
}
4811
4812
rc = gaudi_test_cpu_queue(hdev);
4813
if (rc)
4814
ret_val = -EINVAL;
4815
4816
return ret_val;
4817
}
4818
4819
static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4820
gfp_t mem_flags, dma_addr_t *dma_handle)
4821
{
4822
void *kernel_addr;
4823
4824
if (size > GAUDI_DMA_POOL_BLK_SIZE)
4825
return NULL;
4826
4827
kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4828
4829
/* Shift to the device's base physical address of host memory */
4830
if (kernel_addr)
4831
*dma_handle += HOST_PHYS_BASE;
4832
4833
return kernel_addr;
4834
}
4835
4836
static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4837
dma_addr_t dma_addr)
4838
{
4839
/* Cancel the device's base physical address of host memory */
4840
dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4841
4842
dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4843
}
4844
4845
static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4846
size_t size, dma_addr_t *dma_handle)
4847
{
4848
return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4849
}
4850
4851
static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4852
size_t size, void *vaddr)
4853
{
4854
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4855
}
4856
4857
static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4858
{
4859
struct scatterlist *sg, *sg_next_iter;
4860
u32 count, dma_desc_cnt;
4861
u64 len, len_next;
4862
dma_addr_t addr, addr_next;
4863
4864
dma_desc_cnt = 0;
4865
4866
for_each_sgtable_dma_sg(sgt, sg, count) {
4867
len = sg_dma_len(sg);
4868
addr = sg_dma_address(sg);
4869
4870
if (len == 0)
4871
break;
4872
4873
while ((count + 1) < sgt->nents) {
4874
sg_next_iter = sg_next(sg);
4875
len_next = sg_dma_len(sg_next_iter);
4876
addr_next = sg_dma_address(sg_next_iter);
4877
4878
if (len_next == 0)
4879
break;
4880
4881
if ((addr + len == addr_next) &&
4882
(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4883
len += len_next;
4884
count++;
4885
sg = sg_next_iter;
4886
} else {
4887
break;
4888
}
4889
}
4890
4891
dma_desc_cnt++;
4892
}
4893
4894
return dma_desc_cnt * sizeof(struct packet_lin_dma);
4895
}
4896
4897
static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4898
struct hl_cs_parser *parser,
4899
struct packet_lin_dma *user_dma_pkt,
4900
u64 addr, enum dma_data_direction dir)
4901
{
4902
struct hl_userptr *userptr;
4903
int rc;
4904
4905
if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4906
parser->job_userptr_list, &userptr))
4907
goto already_pinned;
4908
4909
userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4910
if (!userptr)
4911
return -ENOMEM;
4912
4913
rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4914
userptr);
4915
if (rc)
4916
goto free_userptr;
4917
4918
list_add_tail(&userptr->job_node, parser->job_userptr_list);
4919
4920
rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4921
if (rc) {
4922
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4923
goto unpin_memory;
4924
}
4925
4926
userptr->dma_mapped = true;
4927
userptr->dir = dir;
4928
4929
already_pinned:
4930
parser->patched_cb_size +=
4931
gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4932
4933
return 0;
4934
4935
unpin_memory:
4936
list_del(&userptr->job_node);
4937
hl_unpin_host_memory(hdev, userptr);
4938
free_userptr:
4939
kfree(userptr);
4940
return rc;
4941
}
4942
4943
static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4944
struct hl_cs_parser *parser,
4945
struct packet_lin_dma *user_dma_pkt,
4946
bool src_in_host)
4947
{
4948
enum dma_data_direction dir;
4949
bool skip_host_mem_pin = false, user_memset;
4950
u64 addr;
4951
int rc = 0;
4952
4953
user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4954
GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4955
GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4956
4957
if (src_in_host) {
4958
if (user_memset)
4959
skip_host_mem_pin = true;
4960
4961
dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4962
dir = DMA_TO_DEVICE;
4963
addr = le64_to_cpu(user_dma_pkt->src_addr);
4964
} else {
4965
dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4966
dir = DMA_FROM_DEVICE;
4967
addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4968
GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4969
GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4970
}
4971
4972
if (skip_host_mem_pin)
4973
parser->patched_cb_size += sizeof(*user_dma_pkt);
4974
else
4975
rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4976
addr, dir);
4977
4978
return rc;
4979
}
4980
4981
static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4982
struct hl_cs_parser *parser,
4983
struct packet_lin_dma *user_dma_pkt)
4984
{
4985
bool src_in_host = false;
4986
u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4987
GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4988
GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4989
4990
dev_dbg(hdev->dev, "DMA packet details:\n");
4991
dev_dbg(hdev->dev, "source == 0x%llx\n",
4992
le64_to_cpu(user_dma_pkt->src_addr));
4993
dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4994
dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4995
4996
/*
4997
* Special handling for DMA with size 0. Bypass all validations
4998
* because no transactions will be done except for WR_COMP, which
4999
* is not a security issue
5000
*/
5001
if (!le32_to_cpu(user_dma_pkt->tsize)) {
5002
parser->patched_cb_size += sizeof(*user_dma_pkt);
5003
return 0;
5004
}
5005
5006
if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5007
src_in_host = true;
5008
5009
return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5010
src_in_host);
5011
}
5012
5013
static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5014
struct hl_cs_parser *parser,
5015
struct packet_load_and_exe *user_pkt)
5016
{
5017
u32 cfg;
5018
5019
cfg = le32_to_cpu(user_pkt->cfg);
5020
5021
if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5022
dev_err(hdev->dev,
5023
"User not allowed to use Load and Execute\n");
5024
return -EPERM;
5025
}
5026
5027
parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5028
5029
return 0;
5030
}
5031
5032
static int gaudi_validate_cb(struct hl_device *hdev,
5033
struct hl_cs_parser *parser, bool is_mmu)
5034
{
5035
u32 cb_parsed_length = 0;
5036
int rc = 0;
5037
5038
parser->patched_cb_size = 0;
5039
5040
/* cb_user_size is more than 0 so loop will always be executed */
5041
while (cb_parsed_length < parser->user_cb_size) {
5042
enum packet_id pkt_id;
5043
u16 pkt_size;
5044
struct gaudi_packet *user_pkt;
5045
5046
user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5047
5048
pkt_id = (enum packet_id) (
5049
(le64_to_cpu(user_pkt->header) &
5050
PACKET_HEADER_PACKET_ID_MASK) >>
5051
PACKET_HEADER_PACKET_ID_SHIFT);
5052
5053
if (!validate_packet_id(pkt_id)) {
5054
dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5055
rc = -EINVAL;
5056
break;
5057
}
5058
5059
pkt_size = gaudi_packet_sizes[pkt_id];
5060
cb_parsed_length += pkt_size;
5061
if (cb_parsed_length > parser->user_cb_size) {
5062
dev_err(hdev->dev,
5063
"packet 0x%x is out of CB boundary\n", pkt_id);
5064
rc = -EINVAL;
5065
break;
5066
}
5067
5068
switch (pkt_id) {
5069
case PACKET_MSG_PROT:
5070
dev_err(hdev->dev,
5071
"User not allowed to use MSG_PROT\n");
5072
rc = -EPERM;
5073
break;
5074
5075
case PACKET_CP_DMA:
5076
dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5077
rc = -EPERM;
5078
break;
5079
5080
case PACKET_STOP:
5081
dev_err(hdev->dev, "User not allowed to use STOP\n");
5082
rc = -EPERM;
5083
break;
5084
5085
case PACKET_WREG_BULK:
5086
dev_err(hdev->dev,
5087
"User not allowed to use WREG_BULK\n");
5088
rc = -EPERM;
5089
break;
5090
5091
case PACKET_LOAD_AND_EXE:
5092
rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5093
(struct packet_load_and_exe *) user_pkt);
5094
break;
5095
5096
case PACKET_LIN_DMA:
5097
parser->contains_dma_pkt = true;
5098
if (is_mmu)
5099
parser->patched_cb_size += pkt_size;
5100
else
5101
rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5102
(struct packet_lin_dma *) user_pkt);
5103
break;
5104
5105
case PACKET_WREG_32:
5106
case PACKET_MSG_LONG:
5107
case PACKET_MSG_SHORT:
5108
case PACKET_REPEAT:
5109
case PACKET_FENCE:
5110
case PACKET_NOP:
5111
case PACKET_ARB_POINT:
5112
parser->patched_cb_size += pkt_size;
5113
break;
5114
5115
default:
5116
dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5117
pkt_id);
5118
rc = -EINVAL;
5119
break;
5120
}
5121
5122
if (rc)
5123
break;
5124
}
5125
5126
/*
5127
* The new CB should have space at the end for two MSG_PROT packets:
5128
* 1. Optional NOP padding for cacheline alignment
5129
* 2. A packet that will act as a completion packet
5130
* 3. A packet that will generate MSI interrupt
5131
*/
5132
if (parser->completion)
5133
parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5134
parser->patched_cb_size);
5135
5136
return rc;
5137
}
5138
5139
static int gaudi_patch_dma_packet(struct hl_device *hdev,
5140
struct hl_cs_parser *parser,
5141
struct packet_lin_dma *user_dma_pkt,
5142
struct packet_lin_dma *new_dma_pkt,
5143
u32 *new_dma_pkt_size)
5144
{
5145
struct hl_userptr *userptr;
5146
struct scatterlist *sg, *sg_next_iter;
5147
u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5148
u64 len, len_next;
5149
dma_addr_t dma_addr, dma_addr_next;
5150
u64 device_memory_addr, addr;
5151
enum dma_data_direction dir;
5152
struct sg_table *sgt;
5153
bool src_in_host = false;
5154
bool skip_host_mem_pin = false;
5155
bool user_memset;
5156
5157
ctl = le32_to_cpu(user_dma_pkt->ctl);
5158
5159
if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5160
src_in_host = true;
5161
5162
user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5163
GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5164
5165
if (src_in_host) {
5166
addr = le64_to_cpu(user_dma_pkt->src_addr);
5167
device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5168
dir = DMA_TO_DEVICE;
5169
if (user_memset)
5170
skip_host_mem_pin = true;
5171
} else {
5172
addr = le64_to_cpu(user_dma_pkt->dst_addr);
5173
device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5174
dir = DMA_FROM_DEVICE;
5175
}
5176
5177
if ((!skip_host_mem_pin) &&
5178
(!hl_userptr_is_pinned(hdev, addr,
5179
le32_to_cpu(user_dma_pkt->tsize),
5180
parser->job_userptr_list, &userptr))) {
5181
dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5182
addr, user_dma_pkt->tsize);
5183
return -EFAULT;
5184
}
5185
5186
if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5187
memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5188
*new_dma_pkt_size = sizeof(*user_dma_pkt);
5189
return 0;
5190
}
5191
5192
user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5193
5194
sgt = userptr->sgt;
5195
dma_desc_cnt = 0;
5196
5197
for_each_sgtable_dma_sg(sgt, sg, count) {
5198
len = sg_dma_len(sg);
5199
dma_addr = sg_dma_address(sg);
5200
5201
if (len == 0)
5202
break;
5203
5204
while ((count + 1) < sgt->nents) {
5205
sg_next_iter = sg_next(sg);
5206
len_next = sg_dma_len(sg_next_iter);
5207
dma_addr_next = sg_dma_address(sg_next_iter);
5208
5209
if (len_next == 0)
5210
break;
5211
5212
if ((dma_addr + len == dma_addr_next) &&
5213
(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5214
len += len_next;
5215
count++;
5216
sg = sg_next_iter;
5217
} else {
5218
break;
5219
}
5220
}
5221
5222
ctl = le32_to_cpu(user_dma_pkt->ctl);
5223
if (likely(dma_desc_cnt))
5224
ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5225
ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5226
new_dma_pkt->ctl = cpu_to_le32(ctl);
5227
new_dma_pkt->tsize = cpu_to_le32(len);
5228
5229
if (dir == DMA_TO_DEVICE) {
5230
new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5231
new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5232
} else {
5233
new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5234
new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5235
}
5236
5237
if (!user_memset)
5238
device_memory_addr += len;
5239
dma_desc_cnt++;
5240
new_dma_pkt++;
5241
}
5242
5243
if (!dma_desc_cnt) {
5244
dev_err(hdev->dev,
5245
"Error of 0 SG entries when patching DMA packet\n");
5246
return -EFAULT;
5247
}
5248
5249
/* Fix the last dma packet - wrcomp must be as user set it */
5250
new_dma_pkt--;
5251
new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5252
5253
*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5254
5255
return 0;
5256
}
5257
5258
static int gaudi_patch_cb(struct hl_device *hdev,
5259
struct hl_cs_parser *parser)
5260
{
5261
u32 cb_parsed_length = 0;
5262
u32 cb_patched_cur_length = 0;
5263
int rc = 0;
5264
5265
/* cb_user_size is more than 0 so loop will always be executed */
5266
while (cb_parsed_length < parser->user_cb_size) {
5267
enum packet_id pkt_id;
5268
u16 pkt_size;
5269
u32 new_pkt_size = 0;
5270
struct gaudi_packet *user_pkt, *kernel_pkt;
5271
5272
user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5273
kernel_pkt = parser->patched_cb->kernel_address +
5274
cb_patched_cur_length;
5275
5276
pkt_id = (enum packet_id) (
5277
(le64_to_cpu(user_pkt->header) &
5278
PACKET_HEADER_PACKET_ID_MASK) >>
5279
PACKET_HEADER_PACKET_ID_SHIFT);
5280
5281
if (!validate_packet_id(pkt_id)) {
5282
dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5283
rc = -EINVAL;
5284
break;
5285
}
5286
5287
pkt_size = gaudi_packet_sizes[pkt_id];
5288
cb_parsed_length += pkt_size;
5289
if (cb_parsed_length > parser->user_cb_size) {
5290
dev_err(hdev->dev,
5291
"packet 0x%x is out of CB boundary\n", pkt_id);
5292
rc = -EINVAL;
5293
break;
5294
}
5295
5296
switch (pkt_id) {
5297
case PACKET_LIN_DMA:
5298
rc = gaudi_patch_dma_packet(hdev, parser,
5299
(struct packet_lin_dma *) user_pkt,
5300
(struct packet_lin_dma *) kernel_pkt,
5301
&new_pkt_size);
5302
cb_patched_cur_length += new_pkt_size;
5303
break;
5304
5305
case PACKET_MSG_PROT:
5306
dev_err(hdev->dev,
5307
"User not allowed to use MSG_PROT\n");
5308
rc = -EPERM;
5309
break;
5310
5311
case PACKET_CP_DMA:
5312
dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5313
rc = -EPERM;
5314
break;
5315
5316
case PACKET_STOP:
5317
dev_err(hdev->dev, "User not allowed to use STOP\n");
5318
rc = -EPERM;
5319
break;
5320
5321
case PACKET_WREG_32:
5322
case PACKET_WREG_BULK:
5323
case PACKET_MSG_LONG:
5324
case PACKET_MSG_SHORT:
5325
case PACKET_REPEAT:
5326
case PACKET_FENCE:
5327
case PACKET_NOP:
5328
case PACKET_ARB_POINT:
5329
case PACKET_LOAD_AND_EXE:
5330
memcpy(kernel_pkt, user_pkt, pkt_size);
5331
cb_patched_cur_length += pkt_size;
5332
break;
5333
5334
default:
5335
dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5336
pkt_id);
5337
rc = -EINVAL;
5338
break;
5339
}
5340
5341
if (rc)
5342
break;
5343
}
5344
5345
return rc;
5346
}
5347
5348
static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5349
struct hl_cs_parser *parser)
5350
{
5351
u64 handle;
5352
u32 patched_cb_size;
5353
struct hl_cb *user_cb;
5354
int rc;
5355
5356
/*
5357
* The new CB should have space at the end for two MSG_PROT packets:
5358
* 1. Optional NOP padding for cacheline alignment
5359
* 2. A packet that will act as a completion packet
5360
* 3. A packet that will generate MSI interrupt
5361
*/
5362
if (parser->completion)
5363
parser->patched_cb_size = parser->user_cb_size +
5364
gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5365
else
5366
parser->patched_cb_size = parser->user_cb_size;
5367
5368
rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5369
parser->patched_cb_size, false, false,
5370
&handle);
5371
5372
if (rc) {
5373
dev_err(hdev->dev,
5374
"Failed to allocate patched CB for DMA CS %d\n",
5375
rc);
5376
return rc;
5377
}
5378
5379
parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5380
/* hl_cb_get should never fail */
5381
if (!parser->patched_cb) {
5382
dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5383
rc = -EFAULT;
5384
goto out;
5385
}
5386
5387
/*
5388
* We are protected from overflow because the check
5389
* "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5390
* in the common code. That check is done only if is_kernel_allocated_cb is true.
5391
*
5392
* There is no option to reach here without going through that check because:
5393
* 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5394
* an external queue.
5395
* 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5396
*/
5397
memcpy(parser->patched_cb->kernel_address,
5398
parser->user_cb->kernel_address,
5399
parser->user_cb_size);
5400
5401
patched_cb_size = parser->patched_cb_size;
5402
5403
/* Validate patched CB instead of user CB */
5404
user_cb = parser->user_cb;
5405
parser->user_cb = parser->patched_cb;
5406
rc = gaudi_validate_cb(hdev, parser, true);
5407
parser->user_cb = user_cb;
5408
5409
if (rc) {
5410
hl_cb_put(parser->patched_cb);
5411
goto out;
5412
}
5413
5414
if (patched_cb_size != parser->patched_cb_size) {
5415
dev_err(hdev->dev, "user CB size mismatch\n");
5416
hl_cb_put(parser->patched_cb);
5417
rc = -EINVAL;
5418
goto out;
5419
}
5420
5421
out:
5422
/*
5423
* Always call cb destroy here because we still have 1 reference
5424
* to it by calling cb_get earlier. After the job will be completed,
5425
* cb_put will release it, but here we want to remove it from the
5426
* idr
5427
*/
5428
hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5429
5430
return rc;
5431
}
5432
5433
static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5434
struct hl_cs_parser *parser)
5435
{
5436
u64 handle;
5437
int rc;
5438
5439
rc = gaudi_validate_cb(hdev, parser, false);
5440
5441
if (rc)
5442
goto free_userptr;
5443
5444
rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5445
parser->patched_cb_size, false, false,
5446
&handle);
5447
if (rc) {
5448
dev_err(hdev->dev,
5449
"Failed to allocate patched CB for DMA CS %d\n", rc);
5450
goto free_userptr;
5451
}
5452
5453
parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5454
/* hl_cb_get should never fail here */
5455
if (!parser->patched_cb) {
5456
dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5457
rc = -EFAULT;
5458
goto out;
5459
}
5460
5461
rc = gaudi_patch_cb(hdev, parser);
5462
5463
if (rc)
5464
hl_cb_put(parser->patched_cb);
5465
5466
out:
5467
/*
5468
* Always call cb destroy here because we still have 1 reference
5469
* to it by calling cb_get earlier. After the job will be completed,
5470
* cb_put will release it, but here we want to remove it from the
5471
* idr
5472
*/
5473
hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5474
5475
free_userptr:
5476
if (rc)
5477
hl_userptr_delete_list(hdev, parser->job_userptr_list);
5478
return rc;
5479
}
5480
5481
static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5482
struct hl_cs_parser *parser)
5483
{
5484
struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5485
struct gaudi_device *gaudi = hdev->asic_specific;
5486
u32 nic_queue_offset, nic_mask_q_id;
5487
5488
if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5489
(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5490
nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5491
nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5492
5493
if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5494
dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5495
return -EINVAL;
5496
}
5497
}
5498
5499
/* For internal queue jobs just check if CB address is valid */
5500
if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5501
parser->user_cb_size,
5502
asic_prop->sram_user_base_address,
5503
asic_prop->sram_end_address))
5504
return 0;
5505
5506
if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5507
parser->user_cb_size,
5508
asic_prop->dram_user_base_address,
5509
asic_prop->dram_end_address))
5510
return 0;
5511
5512
/* PMMU and HPMMU addresses are equal, check only one of them */
5513
if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5514
parser->user_cb_size,
5515
asic_prop->pmmu.start_addr,
5516
asic_prop->pmmu.end_addr))
5517
return 0;
5518
5519
dev_err(hdev->dev,
5520
"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5521
parser->user_cb, parser->user_cb_size);
5522
5523
return -EFAULT;
5524
}
5525
5526
static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5527
{
5528
struct gaudi_device *gaudi = hdev->asic_specific;
5529
5530
if (parser->queue_type == QUEUE_TYPE_INT)
5531
return gaudi_parse_cb_no_ext_queue(hdev, parser);
5532
5533
if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5534
return gaudi_parse_cb_mmu(hdev, parser);
5535
else
5536
return gaudi_parse_cb_no_mmu(hdev, parser);
5537
}
5538
5539
static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5540
u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5541
u32 msi_vec, bool eb)
5542
{
5543
struct packet_msg_prot *cq_pkt;
5544
struct packet_nop *cq_padding;
5545
u64 msi_addr;
5546
u32 tmp;
5547
5548
cq_padding = kernel_address + original_len;
5549
cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5550
5551
while ((void *)cq_padding < (void *)cq_pkt) {
5552
cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5553
cq_padding++;
5554
}
5555
5556
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5557
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5558
5559
if (eb)
5560
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5561
5562
cq_pkt->ctl = cpu_to_le32(tmp);
5563
cq_pkt->value = cpu_to_le32(cq_val);
5564
cq_pkt->addr = cpu_to_le64(cq_addr);
5565
5566
cq_pkt++;
5567
5568
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5569
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5570
cq_pkt->ctl = cpu_to_le32(tmp);
5571
cq_pkt->value = cpu_to_le32(1);
5572
msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5573
cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5574
}
5575
5576
static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5577
{
5578
WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5579
}
5580
5581
static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5582
u32 size, u64 val)
5583
{
5584
struct packet_lin_dma *lin_dma_pkt;
5585
struct hl_cs_job *job;
5586
u32 cb_size, ctl, err_cause;
5587
struct hl_cb *cb;
5588
int rc;
5589
5590
cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5591
if (!cb)
5592
return -EFAULT;
5593
5594
lin_dma_pkt = cb->kernel_address;
5595
memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5596
cb_size = sizeof(*lin_dma_pkt);
5597
5598
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5599
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5600
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5601
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5602
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5603
5604
lin_dma_pkt->ctl = cpu_to_le32(ctl);
5605
lin_dma_pkt->src_addr = cpu_to_le64(val);
5606
lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5607
lin_dma_pkt->tsize = cpu_to_le32(size);
5608
5609
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5610
if (!job) {
5611
dev_err(hdev->dev, "Failed to allocate a new job\n");
5612
rc = -ENOMEM;
5613
goto release_cb;
5614
}
5615
5616
/* Verify DMA is OK */
5617
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5618
if (err_cause && !hdev->init_done) {
5619
dev_dbg(hdev->dev,
5620
"Clearing DMA0 engine from errors (cause 0x%x)\n",
5621
err_cause);
5622
WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5623
}
5624
5625
job->id = 0;
5626
job->user_cb = cb;
5627
atomic_inc(&job->user_cb->cs_cnt);
5628
job->user_cb_size = cb_size;
5629
job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5630
job->patched_cb = job->user_cb;
5631
job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5632
5633
hl_debugfs_add_job(hdev, job);
5634
5635
rc = gaudi_send_job_on_qman0(hdev, job);
5636
hl_debugfs_remove_job(hdev, job);
5637
kfree(job);
5638
atomic_dec(&cb->cs_cnt);
5639
5640
/* Verify DMA is OK */
5641
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5642
if (err_cause) {
5643
dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5644
rc = -EIO;
5645
if (!hdev->init_done) {
5646
dev_dbg(hdev->dev,
5647
"Clearing DMA0 engine from errors (cause 0x%x)\n",
5648
err_cause);
5649
WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5650
}
5651
}
5652
5653
release_cb:
5654
hl_cb_put(cb);
5655
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5656
5657
return rc;
5658
}
5659
5660
static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5661
u32 num_regs, u32 val)
5662
{
5663
struct packet_msg_long *pkt;
5664
struct hl_cs_job *job;
5665
u32 cb_size, ctl;
5666
struct hl_cb *cb;
5667
int i, rc;
5668
5669
cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5670
5671
if (cb_size > SZ_2M) {
5672
dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5673
return -ENOMEM;
5674
}
5675
5676
cb = hl_cb_kernel_create(hdev, cb_size, false);
5677
if (!cb)
5678
return -EFAULT;
5679
5680
pkt = cb->kernel_address;
5681
5682
ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5683
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5684
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5685
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5686
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5687
5688
for (i = 0; i < num_regs ; i++, pkt++) {
5689
pkt->ctl = cpu_to_le32(ctl);
5690
pkt->value = cpu_to_le32(val);
5691
pkt->addr = cpu_to_le64(reg_base + (i * 4));
5692
}
5693
5694
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5695
if (!job) {
5696
dev_err(hdev->dev, "Failed to allocate a new job\n");
5697
rc = -ENOMEM;
5698
goto release_cb;
5699
}
5700
5701
job->id = 0;
5702
job->user_cb = cb;
5703
atomic_inc(&job->user_cb->cs_cnt);
5704
job->user_cb_size = cb_size;
5705
job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5706
job->patched_cb = job->user_cb;
5707
job->job_cb_size = cb_size;
5708
5709
hl_debugfs_add_job(hdev, job);
5710
5711
rc = gaudi_send_job_on_qman0(hdev, job);
5712
hl_debugfs_remove_job(hdev, job);
5713
kfree(job);
5714
atomic_dec(&cb->cs_cnt);
5715
5716
release_cb:
5717
hl_cb_put(cb);
5718
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5719
5720
return rc;
5721
}
5722
5723
static int gaudi_restore_sm_registers(struct hl_device *hdev)
5724
{
5725
u64 base_addr;
5726
u32 num_regs;
5727
int rc;
5728
5729
base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5730
num_regs = NUM_OF_SOB_IN_BLOCK;
5731
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5732
if (rc) {
5733
dev_err(hdev->dev, "failed resetting SM registers");
5734
return -ENOMEM;
5735
}
5736
5737
base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5738
num_regs = NUM_OF_SOB_IN_BLOCK;
5739
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5740
if (rc) {
5741
dev_err(hdev->dev, "failed resetting SM registers");
5742
return -ENOMEM;
5743
}
5744
5745
base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5746
num_regs = NUM_OF_SOB_IN_BLOCK;
5747
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5748
if (rc) {
5749
dev_err(hdev->dev, "failed resetting SM registers");
5750
return -ENOMEM;
5751
}
5752
5753
base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5754
num_regs = NUM_OF_MONITORS_IN_BLOCK;
5755
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5756
if (rc) {
5757
dev_err(hdev->dev, "failed resetting SM registers");
5758
return -ENOMEM;
5759
}
5760
5761
base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5762
num_regs = NUM_OF_MONITORS_IN_BLOCK;
5763
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5764
if (rc) {
5765
dev_err(hdev->dev, "failed resetting SM registers");
5766
return -ENOMEM;
5767
}
5768
5769
base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5770
num_regs = NUM_OF_MONITORS_IN_BLOCK;
5771
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5772
if (rc) {
5773
dev_err(hdev->dev, "failed resetting SM registers");
5774
return -ENOMEM;
5775
}
5776
5777
base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5778
(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5779
num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5780
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5781
if (rc) {
5782
dev_err(hdev->dev, "failed resetting SM registers");
5783
return -ENOMEM;
5784
}
5785
5786
base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5787
(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5788
num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5789
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5790
if (rc) {
5791
dev_err(hdev->dev, "failed resetting SM registers");
5792
return -ENOMEM;
5793
}
5794
5795
return 0;
5796
}
5797
5798
static void gaudi_restore_dma_registers(struct hl_device *hdev)
5799
{
5800
u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5801
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5802
int i;
5803
5804
for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5805
u64 sob_addr = CFG_BASE +
5806
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5807
(i * sob_delta);
5808
u32 dma_offset = i * DMA_CORE_OFFSET;
5809
5810
WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5811
lower_32_bits(sob_addr));
5812
WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5813
upper_32_bits(sob_addr));
5814
WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5815
5816
/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5817
* modified by the user for SRAM reduction
5818
*/
5819
if (i > 1)
5820
WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5821
0x00000001);
5822
}
5823
}
5824
5825
static void gaudi_restore_qm_registers(struct hl_device *hdev)
5826
{
5827
u32 qman_offset;
5828
int i;
5829
5830
for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5831
qman_offset = i * DMA_QMAN_OFFSET;
5832
WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5833
}
5834
5835
for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5836
qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5837
WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5838
}
5839
5840
for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5841
qman_offset = i * TPC_QMAN_OFFSET;
5842
WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5843
}
5844
5845
for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5846
qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5847
(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5848
WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5849
}
5850
}
5851
5852
static int gaudi_restore_user_registers(struct hl_device *hdev)
5853
{
5854
int rc;
5855
5856
rc = gaudi_restore_sm_registers(hdev);
5857
if (rc)
5858
return rc;
5859
5860
gaudi_restore_dma_registers(hdev);
5861
gaudi_restore_qm_registers(hdev);
5862
5863
return 0;
5864
}
5865
5866
static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5867
{
5868
return 0;
5869
}
5870
5871
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5872
{
5873
u32 size = hdev->asic_prop.mmu_pgt_size +
5874
hdev->asic_prop.mmu_cache_mng_size;
5875
struct gaudi_device *gaudi = hdev->asic_specific;
5876
u64 addr = hdev->asic_prop.mmu_pgt_addr;
5877
5878
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5879
return 0;
5880
5881
return gaudi_memset_device_memory(hdev, addr, size, 0);
5882
}
5883
5884
static void gaudi_restore_phase_topology(struct hl_device *hdev)
5885
{
5886
5887
}
5888
5889
static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5890
u32 size_to_dma, dma_addr_t dma_addr)
5891
{
5892
u32 err_cause, val;
5893
u64 dma_offset;
5894
int rc;
5895
5896
dma_offset = dma_id * DMA_CORE_OFFSET;
5897
5898
WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5899
WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5900
WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5901
WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5902
WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5903
WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5904
(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5905
5906
rc = hl_poll_timeout(
5907
hdev,
5908
mmDMA0_CORE_STS0 + dma_offset,
5909
val,
5910
((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5911
0,
5912
1000000);
5913
5914
if (rc) {
5915
dev_err(hdev->dev,
5916
"DMA %d timed-out during reading of 0x%llx\n",
5917
dma_id, addr);
5918
return -EIO;
5919
}
5920
5921
/* Verify DMA is OK */
5922
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5923
if (err_cause) {
5924
dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5925
dev_dbg(hdev->dev,
5926
"Clearing DMA0 engine from errors (cause 0x%x)\n",
5927
err_cause);
5928
WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5929
5930
return -EIO;
5931
}
5932
5933
return 0;
5934
}
5935
5936
static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5937
void *blob_addr)
5938
{
5939
u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5940
u32 qm_glbl_sts0, qm_cgm_sts;
5941
u64 dma_offset, qm_offset;
5942
dma_addr_t dma_addr;
5943
void *kernel_addr;
5944
bool is_eng_idle;
5945
int rc = 0, dma_id;
5946
5947
kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5948
5949
if (!kernel_addr)
5950
return -ENOMEM;
5951
5952
hdev->asic_funcs->hw_queues_lock(hdev);
5953
5954
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5955
dma_offset = dma_id * DMA_CORE_OFFSET;
5956
qm_offset = dma_id * DMA_QMAN_OFFSET;
5957
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5958
qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5959
qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5960
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5961
IS_DMA_IDLE(dma_core_sts0);
5962
5963
if (!is_eng_idle) {
5964
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5965
dma_offset = dma_id * DMA_CORE_OFFSET;
5966
qm_offset = dma_id * DMA_QMAN_OFFSET;
5967
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5968
qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5969
qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5970
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5971
IS_DMA_IDLE(dma_core_sts0);
5972
5973
if (!is_eng_idle) {
5974
dev_err_ratelimited(hdev->dev,
5975
"Can't read via DMA because it is BUSY\n");
5976
rc = -EAGAIN;
5977
goto out;
5978
}
5979
}
5980
5981
cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5982
WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5983
0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5984
5985
/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5986
* using the compute ctx ASID, if exists. If not, use the kernel ctx
5987
* ASID
5988
*/
5989
WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5990
5991
/* Verify DMA is OK */
5992
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5993
if (err_cause) {
5994
dev_dbg(hdev->dev,
5995
"Clearing DMA0 engine from errors (cause 0x%x)\n",
5996
err_cause);
5997
WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5998
}
5999
6000
pos = 0;
6001
size_left = size;
6002
size_to_dma = SZ_2M;
6003
6004
while (size_left > 0) {
6005
6006
if (size_left < SZ_2M)
6007
size_to_dma = size_left;
6008
6009
rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6010
dma_addr);
6011
if (rc)
6012
break;
6013
6014
memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6015
6016
if (size_left <= SZ_2M)
6017
break;
6018
6019
pos += SZ_2M;
6020
addr += SZ_2M;
6021
size_left -= SZ_2M;
6022
}
6023
6024
/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6025
* using the compute ctx ASID, if exists. If not, use the kernel ctx
6026
* ASID
6027
*/
6028
WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6029
~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6030
6031
WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6032
6033
out:
6034
hdev->asic_funcs->hw_queues_unlock(hdev);
6035
6036
hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6037
6038
return rc;
6039
}
6040
6041
static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6042
{
6043
struct gaudi_device *gaudi = hdev->asic_specific;
6044
6045
if (hdev->reset_info.hard_reset_pending)
6046
return U64_MAX;
6047
6048
return readq(hdev->pcie_bar[HBM_BAR_ID] +
6049
(addr - gaudi->hbm_bar_cur_addr));
6050
}
6051
6052
static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6053
{
6054
struct gaudi_device *gaudi = hdev->asic_specific;
6055
6056
if (hdev->reset_info.hard_reset_pending)
6057
return;
6058
6059
writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6060
(addr - gaudi->hbm_bar_cur_addr));
6061
}
6062
6063
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6064
{
6065
/* mask to zero the MMBP and ASID bits */
6066
WREG32_AND(reg, ~0x7FF);
6067
WREG32_OR(reg, asid);
6068
}
6069
6070
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6071
{
6072
struct gaudi_device *gaudi = hdev->asic_specific;
6073
6074
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6075
return;
6076
6077
if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6078
dev_crit(hdev->dev, "asid %u is too big\n", asid);
6079
return;
6080
}
6081
6082
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6083
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6084
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6085
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6086
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6087
6088
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6089
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6090
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6091
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6092
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6093
6094
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6095
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6096
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6097
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6098
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6099
6100
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6101
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6102
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6103
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6104
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6105
6106
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6107
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6108
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6109
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6110
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6111
6112
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6113
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6114
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6115
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6116
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6117
6118
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6119
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6120
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6121
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6122
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6123
6124
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6125
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6126
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6127
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6128
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6129
6130
gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6131
gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6132
gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6133
gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6134
gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6135
gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6136
gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6137
gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6138
6139
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6140
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6141
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6142
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6143
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6144
gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6145
gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6146
6147
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6148
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6149
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6150
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6151
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6152
gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6153
gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6154
6155
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6156
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6157
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6158
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6159
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6160
gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6161
gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6162
6163
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6164
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6165
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6166
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6167
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6168
gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6169
gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6170
6171
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6172
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6173
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6174
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6175
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6176
gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6177
gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6178
6179
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6180
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6181
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6182
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6183
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6184
gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6185
gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6186
6187
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6188
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6189
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6190
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6191
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6192
gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6193
gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6194
6195
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6196
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6197
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6198
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6199
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6200
gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6201
gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6202
6203
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6204
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6205
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6206
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6207
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6208
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6209
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6210
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6211
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6212
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6213
6214
gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6215
gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6216
gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6217
gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6218
gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6219
gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6220
gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6221
gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6222
gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6223
gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6224
gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6225
gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6226
6227
if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6228
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6229
asid);
6230
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6231
asid);
6232
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6233
asid);
6234
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6235
asid);
6236
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6237
asid);
6238
}
6239
6240
if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6241
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6242
asid);
6243
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6244
asid);
6245
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6246
asid);
6247
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6248
asid);
6249
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6250
asid);
6251
}
6252
6253
if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6254
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6255
asid);
6256
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6257
asid);
6258
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6259
asid);
6260
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6261
asid);
6262
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6263
asid);
6264
}
6265
6266
if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6267
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6268
asid);
6269
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6270
asid);
6271
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6272
asid);
6273
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6274
asid);
6275
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6276
asid);
6277
}
6278
6279
if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6280
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6281
asid);
6282
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6283
asid);
6284
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6285
asid);
6286
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6287
asid);
6288
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6289
asid);
6290
}
6291
6292
if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6293
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6294
asid);
6295
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6296
asid);
6297
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6298
asid);
6299
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6300
asid);
6301
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6302
asid);
6303
}
6304
6305
if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6306
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6307
asid);
6308
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6309
asid);
6310
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6311
asid);
6312
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6313
asid);
6314
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6315
asid);
6316
}
6317
6318
if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6319
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6320
asid);
6321
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6322
asid);
6323
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6324
asid);
6325
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6326
asid);
6327
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6328
asid);
6329
}
6330
6331
if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6332
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6333
asid);
6334
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6335
asid);
6336
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6337
asid);
6338
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6339
asid);
6340
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6341
asid);
6342
}
6343
6344
if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6345
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6346
asid);
6347
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6348
asid);
6349
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6350
asid);
6351
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6352
asid);
6353
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6354
asid);
6355
}
6356
6357
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6358
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6359
}
6360
6361
static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6362
struct hl_cs_job *job)
6363
{
6364
struct packet_msg_prot *fence_pkt;
6365
u32 *fence_ptr;
6366
dma_addr_t fence_dma_addr;
6367
struct hl_cb *cb;
6368
u32 tmp, timeout, dma_offset;
6369
int rc;
6370
6371
if (hdev->pldm)
6372
timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6373
else
6374
timeout = HL_DEVICE_TIMEOUT_USEC;
6375
6376
fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6377
if (!fence_ptr) {
6378
dev_err(hdev->dev,
6379
"Failed to allocate fence memory for QMAN0\n");
6380
return -ENOMEM;
6381
}
6382
6383
cb = job->patched_cb;
6384
6385
fence_pkt = cb->kernel_address +
6386
job->job_cb_size - sizeof(struct packet_msg_prot);
6387
6388
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6389
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6390
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6391
6392
fence_pkt->ctl = cpu_to_le32(tmp);
6393
fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6394
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6395
6396
dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6397
6398
WREG32(mmDMA0_CORE_PROT + dma_offset,
6399
BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6400
6401
rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6402
job->job_cb_size, cb->bus_address);
6403
if (rc) {
6404
dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6405
goto free_fence_ptr;
6406
}
6407
6408
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6409
(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6410
timeout, true);
6411
6412
hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6413
6414
if (rc == -ETIMEDOUT) {
6415
dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6416
goto free_fence_ptr;
6417
}
6418
6419
free_fence_ptr:
6420
WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6421
6422
hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6423
return rc;
6424
}
6425
6426
static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6427
{
6428
if (event_type >= GAUDI_EVENT_SIZE)
6429
goto event_not_supported;
6430
6431
if (!gaudi_irq_map_table[event_type].valid)
6432
goto event_not_supported;
6433
6434
snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6435
6436
return;
6437
6438
event_not_supported:
6439
snprintf(desc, size, "N/A");
6440
}
6441
6442
static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6443
bool is_write, u16 *engine_id_1,
6444
u16 *engine_id_2)
6445
{
6446
u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6447
6448
mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6449
DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6450
6451
switch (x_y) {
6452
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6453
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6454
dma_id[0] = 0;
6455
dma_id[1] = 2;
6456
break;
6457
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6458
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6459
dma_id[0] = 1;
6460
dma_id[1] = 3;
6461
break;
6462
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6463
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6464
dma_id[0] = 4;
6465
dma_id[1] = 6;
6466
break;
6467
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6468
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6469
dma_id[0] = 5;
6470
dma_id[1] = 7;
6471
break;
6472
default:
6473
goto unknown_initiator;
6474
}
6475
6476
for (i = 0 ; i < 2 ; i++) {
6477
dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6478
err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6479
}
6480
6481
switch (x_y) {
6482
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6483
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6484
if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6485
*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6486
return "DMA0";
6487
} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6488
*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6489
return "DMA2";
6490
} else {
6491
*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6492
*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6493
return "DMA0 or DMA2";
6494
}
6495
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6496
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6497
if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6498
*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6499
return "DMA1";
6500
} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6501
*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6502
return "DMA3";
6503
} else {
6504
*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6505
*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6506
return "DMA1 or DMA3";
6507
}
6508
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6509
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6510
if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6511
*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6512
return "DMA4";
6513
} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6514
*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6515
return "DMA6";
6516
} else {
6517
*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6518
*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6519
return "DMA4 or DMA6";
6520
}
6521
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6522
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6523
if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6524
*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6525
return "DMA5";
6526
} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6527
*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6528
return "DMA7";
6529
} else {
6530
*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6531
*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6532
return "DMA5 or DMA7";
6533
}
6534
}
6535
6536
unknown_initiator:
6537
return "unknown initiator";
6538
}
6539
6540
static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6541
u16 *engine_id_1, u16 *engine_id_2)
6542
{
6543
u32 val, x_y, axi_id;
6544
6545
val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6546
RREG32(mmMMU_UP_RAZWI_READ_ID);
6547
x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6548
(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6549
axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6550
RAZWI_INITIATOR_AXI_ID_SHIFT);
6551
6552
switch (x_y) {
6553
case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6554
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6555
*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6556
return "TPC0";
6557
}
6558
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6559
*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6560
return "NIC0";
6561
}
6562
break;
6563
case RAZWI_INITIATOR_ID_X_Y_TPC1:
6564
*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6565
return "TPC1";
6566
case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6567
case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6568
*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6569
return "MME0";
6570
case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6571
case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6572
*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6573
return "MME1";
6574
case RAZWI_INITIATOR_ID_X_Y_TPC2:
6575
*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6576
return "TPC2";
6577
case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6578
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6579
*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6580
return "TPC3";
6581
}
6582
/* PCI, CPU or PSOC does not have engine id*/
6583
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6584
return "PCI";
6585
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6586
return "CPU";
6587
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6588
return "PSOC";
6589
break;
6590
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6591
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6592
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6593
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6594
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6595
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6596
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6597
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6598
return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6599
engine_id_1, engine_id_2);
6600
case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6601
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6602
*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6603
return "TPC4";
6604
}
6605
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6606
*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6607
return "NIC1";
6608
}
6609
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6610
*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6611
return "NIC2";
6612
}
6613
break;
6614
case RAZWI_INITIATOR_ID_X_Y_TPC5:
6615
*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6616
return "TPC5";
6617
case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6618
case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6619
*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6620
return "MME2";
6621
case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6622
case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6623
*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6624
return "MME3";
6625
case RAZWI_INITIATOR_ID_X_Y_TPC6:
6626
*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6627
return "TPC6";
6628
case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6629
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6630
*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6631
return "TPC7";
6632
}
6633
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6634
*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6635
return "NIC4";
6636
}
6637
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6638
*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6639
return "NIC5";
6640
}
6641
break;
6642
default:
6643
break;
6644
}
6645
6646
dev_err(hdev->dev,
6647
"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6648
val,
6649
(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6650
(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6651
(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6652
RAZWI_INITIATOR_AXI_ID_MASK);
6653
6654
return "unknown initiator";
6655
}
6656
6657
static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6658
u16 *engine_id_2, bool *is_read, bool *is_write)
6659
{
6660
6661
if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6662
dev_err_ratelimited(hdev->dev,
6663
"RAZWI event caused by illegal write of %s\n",
6664
gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6665
WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6666
*is_write = true;
6667
}
6668
6669
if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6670
dev_err_ratelimited(hdev->dev,
6671
"RAZWI event caused by illegal read of %s\n",
6672
gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6673
WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6674
*is_read = true;
6675
}
6676
}
6677
6678
static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6679
{
6680
struct gaudi_device *gaudi = hdev->asic_specific;
6681
u32 val;
6682
6683
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6684
return;
6685
6686
val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6687
if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6688
*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6689
*addr <<= 32;
6690
*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6691
6692
dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6693
hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6694
6695
WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6696
}
6697
6698
val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6699
if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6700
*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6701
*addr <<= 32;
6702
*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6703
6704
dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6705
6706
WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6707
}
6708
}
6709
6710
/*
6711
* +-------------------+------------------------------------------------------+
6712
* | Configuration Reg | Description |
6713
* | Address | |
6714
* +-------------------+------------------------------------------------------+
6715
* | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6716
* | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6717
* | |0xF34 memory wrappers 63:32 |
6718
* | |0xF38 memory wrappers 95:64 |
6719
* | |0xF3C memory wrappers 127:96 |
6720
* +-------------------+------------------------------------------------------+
6721
* | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6722
* | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6723
* | |0xF44 memory wrappers 63:32 |
6724
* | |0xF48 memory wrappers 95:64 |
6725
* | |0xF4C memory wrappers 127:96 |
6726
* +-------------------+------------------------------------------------------+
6727
*/
6728
static int gaudi_extract_ecc_info(struct hl_device *hdev,
6729
struct ecc_info_extract_params *params, u64 *ecc_address,
6730
u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6731
{
6732
u32 i, num_mem_regs, reg, err_bit;
6733
u64 err_addr, err_word = 0;
6734
6735
num_mem_regs = params->num_memories / 32 +
6736
((params->num_memories % 32) ? 1 : 0);
6737
6738
if (params->block_address >= CFG_BASE)
6739
params->block_address -= CFG_BASE;
6740
6741
if (params->derr)
6742
err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6743
else
6744
err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6745
6746
/* Set invalid wrapper index */
6747
*memory_wrapper_idx = 0xFF;
6748
6749
/* Iterate through memory wrappers, a single bit must be set */
6750
for (i = 0 ; i < num_mem_regs ; i++) {
6751
err_addr += i * 4;
6752
err_word = RREG32(err_addr);
6753
if (err_word) {
6754
err_bit = __ffs(err_word);
6755
*memory_wrapper_idx = err_bit + (32 * i);
6756
break;
6757
}
6758
}
6759
6760
if (*memory_wrapper_idx == 0xFF) {
6761
dev_err(hdev->dev, "ECC error information cannot be found\n");
6762
return -EINVAL;
6763
}
6764
6765
WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6766
*memory_wrapper_idx);
6767
6768
*ecc_address =
6769
RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6770
*ecc_syndrom =
6771
RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6772
6773
/* Clear error indication */
6774
reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6775
if (params->derr)
6776
reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6777
else
6778
reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6779
6780
WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6781
6782
return 0;
6783
}
6784
6785
/*
6786
* gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6787
*
6788
* @idx: the current pi/ci value
6789
* @q_len: the queue length (power of 2)
6790
*
6791
* @return the cyclically decremented index
6792
*/
6793
static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6794
{
6795
u32 mask = q_len - 1;
6796
6797
/*
6798
* modular decrement is equivalent to adding (queue_size -1)
6799
* later we take LSBs to make sure the value is in the
6800
* range [0, queue_len - 1]
6801
*/
6802
return (idx + q_len - 1) & mask;
6803
}
6804
6805
/**
6806
* gaudi_handle_sw_config_stream_data - print SW config stream data
6807
*
6808
* @hdev: pointer to the habanalabs device structure
6809
* @stream: the QMAN's stream
6810
* @qman_base: base address of QMAN registers block
6811
* @event_mask: mask of the last events occurred
6812
*/
6813
static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6814
u64 qman_base, u64 event_mask)
6815
{
6816
u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6817
u32 cq_ptr_lo_off, size;
6818
6819
cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6820
6821
cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6822
stream * cq_ptr_lo_off;
6823
cq_ptr_hi = cq_ptr_lo +
6824
(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6825
cq_tsize = cq_ptr_lo +
6826
(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6827
6828
cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6829
size = RREG32(cq_tsize);
6830
dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6831
stream, cq_ptr, size);
6832
6833
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6834
hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6835
hdev->captured_err_info.undef_opcode.cq_size = size;
6836
hdev->captured_err_info.undef_opcode.stream_id = stream;
6837
}
6838
}
6839
6840
/**
6841
* gaudi_handle_last_pqes_on_err - print last PQEs on error
6842
*
6843
* @hdev: pointer to the habanalabs device structure
6844
* @qid_base: first QID of the QMAN (out of 4 streams)
6845
* @stream: the QMAN's stream
6846
* @qman_base: base address of QMAN registers block
6847
* @event_mask: mask of the last events occurred
6848
* @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6849
*/
6850
static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6851
u32 stream, u64 qman_base,
6852
u64 event_mask,
6853
bool pr_sw_conf)
6854
{
6855
u32 ci, qm_ci_stream_off, queue_len;
6856
struct hl_hw_queue *q;
6857
u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6858
int i;
6859
6860
q = &hdev->kernel_queues[qid_base + stream];
6861
6862
qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6863
pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6864
stream * qm_ci_stream_off;
6865
6866
queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6867
q->int_queue_len : HL_QUEUE_LENGTH;
6868
6869
hdev->asic_funcs->hw_queues_lock(hdev);
6870
6871
if (pr_sw_conf)
6872
gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6873
6874
ci = RREG32(pq_ci);
6875
6876
/* we should start printing form ci -1 */
6877
ci = gaudi_queue_idx_dec(ci, queue_len);
6878
memset(addr, 0, sizeof(addr));
6879
6880
for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6881
struct hl_bd *bd;
6882
u32 len;
6883
6884
bd = q->kernel_address;
6885
bd += ci;
6886
6887
len = le32_to_cpu(bd->len);
6888
/* len 0 means uninitialized entry- break */
6889
if (!len)
6890
break;
6891
6892
addr[i] = le64_to_cpu(bd->ptr);
6893
6894
dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6895
stream, ci, addr[i], len);
6896
6897
/* get previous ci, wrap if needed */
6898
ci = gaudi_queue_idx_dec(ci, queue_len);
6899
}
6900
6901
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6902
struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6903
u32 arr_idx = undef_opcode->cb_addr_streams_len;
6904
6905
if (arr_idx == 0) {
6906
undef_opcode->timestamp = ktime_get();
6907
undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6908
}
6909
6910
memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6911
undef_opcode->cb_addr_streams_len++;
6912
}
6913
6914
hdev->asic_funcs->hw_queues_unlock(hdev);
6915
}
6916
6917
/**
6918
* handle_qman_data_on_err - extract QMAN data on error
6919
*
6920
* @hdev: pointer to the habanalabs device structure
6921
* @qid_base: first QID of the QMAN (out of 4 streams)
6922
* @stream: the QMAN's stream
6923
* @qman_base: base address of QMAN registers block
6924
* @event_mask: mask of the last events occurred
6925
*
6926
* This function attempt to exatract as much data as possible on QMAN error.
6927
* On upper CP print the SW config stream data and last 8 PQEs.
6928
* On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6929
*/
6930
static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6931
u32 stream, u64 qman_base, u64 event_mask)
6932
{
6933
u32 i;
6934
6935
if (stream != QMAN_STREAMS) {
6936
gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6937
qman_base, event_mask, true);
6938
return;
6939
}
6940
6941
/* handle Lower-CP */
6942
gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6943
6944
for (i = 0; i < QMAN_STREAMS; i++)
6945
gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6946
qman_base, event_mask, false);
6947
}
6948
6949
static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6950
const char *qm_name,
6951
u64 qman_base,
6952
u32 qid_base,
6953
u64 *event_mask)
6954
{
6955
u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6956
u64 glbl_sts_addr, arb_err_addr;
6957
char reg_desc[32];
6958
6959
glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6960
arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6961
6962
/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6963
for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6964
glbl_sts_clr_val = 0;
6965
glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6966
6967
if (!glbl_sts_val)
6968
continue;
6969
6970
if (i == QMAN_STREAMS)
6971
snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6972
else
6973
snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6974
6975
for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6976
if (glbl_sts_val & BIT(j)) {
6977
dev_err_ratelimited(hdev->dev,
6978
"%s %s. err cause: %s\n",
6979
qm_name, reg_desc,
6980
gaudi_qman_error_cause[j]);
6981
glbl_sts_clr_val |= BIT(j);
6982
}
6983
}
6984
/* check for undefined opcode */
6985
if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6986
hdev->captured_err_info.undef_opcode.write_enable) {
6987
memset(&hdev->captured_err_info.undef_opcode, 0,
6988
sizeof(hdev->captured_err_info.undef_opcode));
6989
6990
hdev->captured_err_info.undef_opcode.write_enable = false;
6991
*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6992
}
6993
6994
/* Write 1 clear errors */
6995
if (!hdev->stop_on_err)
6996
WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6997
else
6998
handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6999
}
7000
7001
arb_err_val = RREG32(arb_err_addr);
7002
7003
if (!arb_err_val)
7004
return;
7005
7006
for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7007
if (arb_err_val & BIT(j)) {
7008
dev_err_ratelimited(hdev->dev,
7009
"%s ARB_ERR. err cause: %s\n",
7010
qm_name,
7011
gaudi_qman_arb_error_cause[j]);
7012
}
7013
}
7014
}
7015
7016
static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7017
struct hl_eq_sm_sei_data *sei_data)
7018
{
7019
u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7020
7021
/* Flip the bits as the enum is ordered in the opposite way */
7022
index = (index ^ 0x3) & 0x3;
7023
7024
switch (sei_data->sei_cause) {
7025
case SM_SEI_SO_OVERFLOW:
7026
dev_err_ratelimited(hdev->dev,
7027
"%s SEI Error: SOB Group %u overflow/underflow",
7028
gaudi_sync_manager_names[index],
7029
le32_to_cpu(sei_data->sei_log));
7030
break;
7031
case SM_SEI_LBW_4B_UNALIGNED:
7032
dev_err_ratelimited(hdev->dev,
7033
"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7034
gaudi_sync_manager_names[index],
7035
le32_to_cpu(sei_data->sei_log));
7036
break;
7037
case SM_SEI_AXI_RESPONSE_ERR:
7038
dev_err_ratelimited(hdev->dev,
7039
"%s SEI Error: AXI ID %u response error",
7040
gaudi_sync_manager_names[index],
7041
le32_to_cpu(sei_data->sei_log));
7042
break;
7043
default:
7044
dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7045
le32_to_cpu(sei_data->sei_log));
7046
break;
7047
}
7048
}
7049
7050
static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7051
struct hl_eq_ecc_data *ecc_data)
7052
{
7053
struct ecc_info_extract_params params;
7054
u64 ecc_address = 0, ecc_syndrom = 0;
7055
u8 index, memory_wrapper_idx = 0;
7056
bool extract_info_from_fw;
7057
int rc;
7058
7059
if (hdev->asic_prop.fw_security_enabled) {
7060
extract_info_from_fw = true;
7061
goto extract_ecc_info;
7062
}
7063
7064
switch (event_type) {
7065
case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7066
case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7067
extract_info_from_fw = true;
7068
break;
7069
case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7070
index = event_type - GAUDI_EVENT_TPC0_SERR;
7071
params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7072
params.num_memories = 90;
7073
params.derr = false;
7074
extract_info_from_fw = false;
7075
break;
7076
case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7077
index = event_type - GAUDI_EVENT_TPC0_DERR;
7078
params.block_address =
7079
mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7080
params.num_memories = 90;
7081
params.derr = true;
7082
extract_info_from_fw = false;
7083
break;
7084
case GAUDI_EVENT_MME0_ACC_SERR:
7085
case GAUDI_EVENT_MME1_ACC_SERR:
7086
case GAUDI_EVENT_MME2_ACC_SERR:
7087
case GAUDI_EVENT_MME3_ACC_SERR:
7088
index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7089
params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7090
params.num_memories = 128;
7091
params.derr = false;
7092
extract_info_from_fw = false;
7093
break;
7094
case GAUDI_EVENT_MME0_ACC_DERR:
7095
case GAUDI_EVENT_MME1_ACC_DERR:
7096
case GAUDI_EVENT_MME2_ACC_DERR:
7097
case GAUDI_EVENT_MME3_ACC_DERR:
7098
index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7099
params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7100
params.num_memories = 128;
7101
params.derr = true;
7102
extract_info_from_fw = false;
7103
break;
7104
case GAUDI_EVENT_MME0_SBAB_SERR:
7105
case GAUDI_EVENT_MME1_SBAB_SERR:
7106
case GAUDI_EVENT_MME2_SBAB_SERR:
7107
case GAUDI_EVENT_MME3_SBAB_SERR:
7108
index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7109
params.block_address =
7110
mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7111
params.num_memories = 33;
7112
params.derr = false;
7113
extract_info_from_fw = false;
7114
break;
7115
case GAUDI_EVENT_MME0_SBAB_DERR:
7116
case GAUDI_EVENT_MME1_SBAB_DERR:
7117
case GAUDI_EVENT_MME2_SBAB_DERR:
7118
case GAUDI_EVENT_MME3_SBAB_DERR:
7119
index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7120
params.block_address =
7121
mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7122
params.num_memories = 33;
7123
params.derr = true;
7124
extract_info_from_fw = false;
7125
break;
7126
default:
7127
return;
7128
}
7129
7130
extract_ecc_info:
7131
if (extract_info_from_fw) {
7132
ecc_address = le64_to_cpu(ecc_data->ecc_address);
7133
ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7134
memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7135
} else {
7136
rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7137
&ecc_syndrom, &memory_wrapper_idx);
7138
if (rc)
7139
return;
7140
}
7141
7142
dev_err(hdev->dev,
7143
"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7144
ecc_address, ecc_syndrom, memory_wrapper_idx);
7145
}
7146
7147
static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7148
{
7149
u64 qman_base;
7150
char desc[32];
7151
u32 qid_base;
7152
u8 index;
7153
7154
switch (event_type) {
7155
case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7156
index = event_type - GAUDI_EVENT_TPC0_QM;
7157
qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7158
qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7159
snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7160
break;
7161
case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7162
if (event_type == GAUDI_EVENT_MME0_QM) {
7163
index = 0;
7164
qid_base = GAUDI_QUEUE_ID_MME_0_0;
7165
} else { /* event_type == GAUDI_EVENT_MME2_QM */
7166
index = 2;
7167
qid_base = GAUDI_QUEUE_ID_MME_1_0;
7168
}
7169
qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7170
snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7171
break;
7172
case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7173
index = event_type - GAUDI_EVENT_DMA0_QM;
7174
qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7175
/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7176
if (index > 1)
7177
qid_base++;
7178
qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7179
snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7180
break;
7181
case GAUDI_EVENT_NIC0_QM0:
7182
qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7183
qman_base = mmNIC0_QM0_BASE;
7184
snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7185
break;
7186
case GAUDI_EVENT_NIC0_QM1:
7187
qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7188
qman_base = mmNIC0_QM1_BASE;
7189
snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7190
break;
7191
case GAUDI_EVENT_NIC1_QM0:
7192
qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7193
qman_base = mmNIC1_QM0_BASE;
7194
snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7195
break;
7196
case GAUDI_EVENT_NIC1_QM1:
7197
qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7198
qman_base = mmNIC1_QM1_BASE;
7199
snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7200
break;
7201
case GAUDI_EVENT_NIC2_QM0:
7202
qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7203
qman_base = mmNIC2_QM0_BASE;
7204
snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7205
break;
7206
case GAUDI_EVENT_NIC2_QM1:
7207
qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7208
qman_base = mmNIC2_QM1_BASE;
7209
snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7210
break;
7211
case GAUDI_EVENT_NIC3_QM0:
7212
qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7213
qman_base = mmNIC3_QM0_BASE;
7214
snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7215
break;
7216
case GAUDI_EVENT_NIC3_QM1:
7217
qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7218
qman_base = mmNIC3_QM1_BASE;
7219
snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7220
break;
7221
case GAUDI_EVENT_NIC4_QM0:
7222
qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7223
qman_base = mmNIC4_QM0_BASE;
7224
snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7225
break;
7226
case GAUDI_EVENT_NIC4_QM1:
7227
qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7228
qman_base = mmNIC4_QM1_BASE;
7229
snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7230
break;
7231
default:
7232
return;
7233
}
7234
7235
gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7236
}
7237
7238
static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7239
bool check_razwi, u64 *event_mask)
7240
{
7241
bool is_read = false, is_write = false;
7242
u16 engine_id[2], num_of_razwi_eng = 0;
7243
char desc[64] = "";
7244
u64 razwi_addr = 0;
7245
u8 razwi_flags = 0;
7246
7247
/*
7248
* Init engine id by default as not valid and only if razwi initiated from engine with
7249
* engine id it will get valid value.
7250
*/
7251
engine_id[0] = HL_RAZWI_NA_ENG_ID;
7252
engine_id[1] = HL_RAZWI_NA_ENG_ID;
7253
7254
gaudi_get_event_desc(event_type, desc, sizeof(desc));
7255
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7256
event_type, desc);
7257
7258
if (check_razwi) {
7259
gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7260
&is_write);
7261
gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7262
7263
if (is_read)
7264
razwi_flags |= HL_RAZWI_READ;
7265
if (is_write)
7266
razwi_flags |= HL_RAZWI_WRITE;
7267
7268
if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7269
if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7270
num_of_razwi_eng = 2;
7271
else
7272
num_of_razwi_eng = 1;
7273
}
7274
7275
if (razwi_flags)
7276
hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7277
razwi_flags, event_mask);
7278
}
7279
}
7280
7281
static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7282
struct cpucp_pkt_sync_err *sync_err)
7283
{
7284
struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7285
7286
dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7287
le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7288
}
7289
7290
static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7291
struct hl_eq_fw_alive *fw_alive)
7292
{
7293
dev_err(hdev->dev,
7294
"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7295
(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7296
le32_to_cpu(fw_alive->process_id),
7297
le32_to_cpu(fw_alive->thread_id),
7298
le64_to_cpu(fw_alive->uptime_seconds));
7299
}
7300
7301
static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7302
void *data)
7303
{
7304
char desc[64] = "", *type;
7305
struct eq_nic_sei_event *eq_nic_sei = data;
7306
u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7307
7308
switch (eq_nic_sei->axi_error_cause) {
7309
case RXB:
7310
type = "RXB";
7311
break;
7312
case RXE:
7313
type = "RXE";
7314
break;
7315
case TXS:
7316
type = "TXS";
7317
break;
7318
case TXE:
7319
type = "TXE";
7320
break;
7321
case QPC_RESP:
7322
type = "QPC_RESP";
7323
break;
7324
case NON_AXI_ERR:
7325
type = "NON_AXI_ERR";
7326
break;
7327
case TMR:
7328
type = "TMR";
7329
break;
7330
default:
7331
dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7332
eq_nic_sei->axi_error_cause);
7333
type = "N/A";
7334
break;
7335
}
7336
7337
snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7338
eq_nic_sei->id);
7339
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7340
event_type, desc);
7341
}
7342
7343
static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7344
{
7345
/* GAUDI doesn't support any reset except hard-reset */
7346
return -EPERM;
7347
}
7348
7349
static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7350
struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7351
{
7352
u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7353
int rc = 0;
7354
7355
if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7356
CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7357
if (!hbm_ecc_data) {
7358
dev_err(hdev->dev, "No FW ECC data");
7359
return 0;
7360
}
7361
7362
wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7363
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7364
rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7365
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7366
ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7367
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7368
derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7369
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7370
serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7371
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7372
type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7373
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7374
ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7375
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7376
7377
dev_err(hdev->dev,
7378
"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7379
device, ch, wr_par, rd_par, ca_par, serr, derr);
7380
dev_err(hdev->dev,
7381
"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7382
device, ch, hbm_ecc_data->first_addr, type,
7383
hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7384
hbm_ecc_data->dec_cnt);
7385
return 0;
7386
}
7387
7388
if (hdev->asic_prop.fw_security_enabled) {
7389
dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7390
return 0;
7391
}
7392
7393
base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7394
for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7395
val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7396
val = (val & 0xFF) | ((val >> 8) & 0xFF);
7397
if (val) {
7398
rc = -EIO;
7399
dev_err(hdev->dev,
7400
"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7401
device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7402
(val >> 2) & 0x1, (val >> 3) & 0x1,
7403
(val >> 4) & 0x1);
7404
7405
val2 = RREG32(base + ch * 0x1000 + 0x060);
7406
dev_err(hdev->dev,
7407
"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7408
device, ch * 2,
7409
RREG32(base + ch * 0x1000 + 0x064),
7410
(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7411
(val2 & 0xFF0000) >> 16,
7412
(val2 & 0xFF000000) >> 24);
7413
}
7414
7415
val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7416
val = (val & 0xFF) | ((val >> 8) & 0xFF);
7417
if (val) {
7418
rc = -EIO;
7419
dev_err(hdev->dev,
7420
"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7421
device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7422
(val >> 2) & 0x1, (val >> 3) & 0x1,
7423
(val >> 4) & 0x1);
7424
7425
val2 = RREG32(base + ch * 0x1000 + 0x070);
7426
dev_err(hdev->dev,
7427
"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7428
device, ch * 2 + 1,
7429
RREG32(base + ch * 0x1000 + 0x074),
7430
(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7431
(val2 & 0xFF0000) >> 16,
7432
(val2 & 0xFF000000) >> 24);
7433
}
7434
7435
/* Clear interrupts */
7436
RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7437
RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7438
WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7439
WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7440
RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7441
RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7442
}
7443
7444
val = RREG32(base + 0x8F30);
7445
val2 = RREG32(base + 0x8F34);
7446
if (val | val2) {
7447
rc = -EIO;
7448
dev_err(hdev->dev,
7449
"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7450
device, val, val2);
7451
}
7452
val = RREG32(base + 0x8F40);
7453
val2 = RREG32(base + 0x8F44);
7454
if (val | val2) {
7455
rc = -EIO;
7456
dev_err(hdev->dev,
7457
"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7458
device, val, val2);
7459
}
7460
7461
return rc;
7462
}
7463
7464
static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7465
{
7466
switch (hbm_event_type) {
7467
case GAUDI_EVENT_HBM0_SPI_0:
7468
case GAUDI_EVENT_HBM0_SPI_1:
7469
return 0;
7470
case GAUDI_EVENT_HBM1_SPI_0:
7471
case GAUDI_EVENT_HBM1_SPI_1:
7472
return 1;
7473
case GAUDI_EVENT_HBM2_SPI_0:
7474
case GAUDI_EVENT_HBM2_SPI_1:
7475
return 2;
7476
case GAUDI_EVENT_HBM3_SPI_0:
7477
case GAUDI_EVENT_HBM3_SPI_1:
7478
return 3;
7479
default:
7480
break;
7481
}
7482
7483
/* Should never happen */
7484
return 0;
7485
}
7486
7487
static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7488
char *interrupt_name)
7489
{
7490
u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7491
bool soft_reset_required = false;
7492
7493
tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7494
TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7495
7496
for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7497
if (tpc_interrupts_cause & BIT(i)) {
7498
dev_err_ratelimited(hdev->dev,
7499
"TPC%d_%s interrupt cause: %s\n",
7500
tpc_id, interrupt_name,
7501
gaudi_tpc_interrupts_cause[i]);
7502
/* If this is QM error, we need to soft-reset */
7503
if (i == 15)
7504
soft_reset_required = true;
7505
}
7506
7507
/* Clear interrupts */
7508
WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7509
7510
return soft_reset_required;
7511
}
7512
7513
static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7514
{
7515
return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7516
}
7517
7518
static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7519
{
7520
return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7521
}
7522
7523
static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7524
{
7525
ktime_t zero_time = ktime_set(0, 0);
7526
7527
mutex_lock(&hdev->clk_throttling.lock);
7528
7529
switch (event_type) {
7530
case GAUDI_EVENT_FIX_POWER_ENV_S:
7531
hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7532
hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7533
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7534
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7535
dev_info_ratelimited(hdev->dev,
7536
"Clock throttling due to power consumption\n");
7537
break;
7538
7539
case GAUDI_EVENT_FIX_POWER_ENV_E:
7540
hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7541
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7542
dev_info_ratelimited(hdev->dev,
7543
"Power envelop is safe, back to optimal clock\n");
7544
break;
7545
7546
case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7547
hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7548
hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7549
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7550
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7551
*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7552
dev_info_ratelimited(hdev->dev,
7553
"Clock throttling due to overheating\n");
7554
break;
7555
7556
case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7557
hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7558
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7559
*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7560
dev_info_ratelimited(hdev->dev,
7561
"Thermal envelop is safe, back to optimal clock\n");
7562
break;
7563
7564
default:
7565
dev_err(hdev->dev, "Received invalid clock change event %d\n",
7566
event_type);
7567
break;
7568
}
7569
7570
mutex_unlock(&hdev->clk_throttling.lock);
7571
}
7572
7573
static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7574
{
7575
struct gaudi_device *gaudi = hdev->asic_specific;
7576
struct hl_info_fw_err_info fw_err_info;
7577
u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7578
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7579
u32 fw_fatal_err_flag = 0, flags = 0;
7580
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7581
>> EQ_CTL_EVENT_TYPE_SHIFT);
7582
bool reset_required, reset_direct = false;
7583
u8 cause;
7584
int rc;
7585
7586
if (event_type >= GAUDI_EVENT_SIZE) {
7587
dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7588
event_type, GAUDI_EVENT_SIZE - 1);
7589
return;
7590
}
7591
7592
gaudi->events_stat[event_type]++;
7593
gaudi->events_stat_aggregate[event_type]++;
7594
7595
switch (event_type) {
7596
case GAUDI_EVENT_PCIE_CORE_DERR:
7597
case GAUDI_EVENT_PCIE_IF_DERR:
7598
case GAUDI_EVENT_PCIE_PHY_DERR:
7599
case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7600
case GAUDI_EVENT_MME0_ACC_DERR:
7601
case GAUDI_EVENT_MME0_SBAB_DERR:
7602
case GAUDI_EVENT_MME1_ACC_DERR:
7603
case GAUDI_EVENT_MME1_SBAB_DERR:
7604
case GAUDI_EVENT_MME2_ACC_DERR:
7605
case GAUDI_EVENT_MME2_SBAB_DERR:
7606
case GAUDI_EVENT_MME3_ACC_DERR:
7607
case GAUDI_EVENT_MME3_SBAB_DERR:
7608
case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7609
fallthrough;
7610
case GAUDI_EVENT_CPU_IF_ECC_DERR:
7611
case GAUDI_EVENT_PSOC_MEM_DERR:
7612
case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7613
case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7614
case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7615
case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7616
case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7617
case GAUDI_EVENT_MMU_DERR:
7618
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7619
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7620
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7621
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7622
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7623
goto reset_device;
7624
7625
case GAUDI_EVENT_GIC500:
7626
case GAUDI_EVENT_AXI_ECC:
7627
case GAUDI_EVENT_L2_RAM_ECC:
7628
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7629
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7630
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7631
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7632
goto reset_device;
7633
7634
case GAUDI_EVENT_HBM0_SPI_0:
7635
case GAUDI_EVENT_HBM1_SPI_0:
7636
case GAUDI_EVENT_HBM2_SPI_0:
7637
case GAUDI_EVENT_HBM3_SPI_0:
7638
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7639
gaudi_hbm_read_interrupts(hdev,
7640
gaudi_hbm_event_to_dev(event_type),
7641
&eq_entry->hbm_ecc_data);
7642
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7643
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7644
goto reset_device;
7645
7646
case GAUDI_EVENT_HBM0_SPI_1:
7647
case GAUDI_EVENT_HBM1_SPI_1:
7648
case GAUDI_EVENT_HBM2_SPI_1:
7649
case GAUDI_EVENT_HBM3_SPI_1:
7650
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7651
gaudi_hbm_read_interrupts(hdev,
7652
gaudi_hbm_event_to_dev(event_type),
7653
&eq_entry->hbm_ecc_data);
7654
hl_fw_unmask_irq(hdev, event_type);
7655
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7656
break;
7657
7658
case GAUDI_EVENT_TPC0_DEC:
7659
case GAUDI_EVENT_TPC1_DEC:
7660
case GAUDI_EVENT_TPC2_DEC:
7661
case GAUDI_EVENT_TPC3_DEC:
7662
case GAUDI_EVENT_TPC4_DEC:
7663
case GAUDI_EVENT_TPC5_DEC:
7664
case GAUDI_EVENT_TPC6_DEC:
7665
case GAUDI_EVENT_TPC7_DEC:
7666
/* In TPC DEC event, notify on TPC assertion. While there isn't
7667
* a specific event for assertion yet, the FW generates TPC DEC event.
7668
* The SW upper layer will inspect an internal mapped area to indicate
7669
* if the event is a TPC Assertion or a "real" TPC DEC.
7670
*/
7671
event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7672
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7673
reset_required = gaudi_tpc_read_interrupts(hdev,
7674
tpc_dec_event_to_tpc_id(event_type),
7675
"AXI_SLV_DEC_Error");
7676
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7677
if (reset_required) {
7678
dev_err(hdev->dev, "reset required due to %s\n",
7679
gaudi_irq_map_table[event_type].name);
7680
7681
reset_direct = true;
7682
goto reset_device;
7683
} else {
7684
hl_fw_unmask_irq(hdev, event_type);
7685
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7686
}
7687
break;
7688
7689
case GAUDI_EVENT_TPC0_KRN_ERR:
7690
case GAUDI_EVENT_TPC1_KRN_ERR:
7691
case GAUDI_EVENT_TPC2_KRN_ERR:
7692
case GAUDI_EVENT_TPC3_KRN_ERR:
7693
case GAUDI_EVENT_TPC4_KRN_ERR:
7694
case GAUDI_EVENT_TPC5_KRN_ERR:
7695
case GAUDI_EVENT_TPC6_KRN_ERR:
7696
case GAUDI_EVENT_TPC7_KRN_ERR:
7697
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7698
reset_required = gaudi_tpc_read_interrupts(hdev,
7699
tpc_krn_event_to_tpc_id(event_type),
7700
"KRN_ERR");
7701
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7702
if (reset_required) {
7703
dev_err(hdev->dev, "reset required due to %s\n",
7704
gaudi_irq_map_table[event_type].name);
7705
7706
reset_direct = true;
7707
goto reset_device;
7708
} else {
7709
hl_fw_unmask_irq(hdev, event_type);
7710
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7711
}
7712
break;
7713
7714
case GAUDI_EVENT_PCIE_CORE_SERR:
7715
case GAUDI_EVENT_PCIE_IF_SERR:
7716
case GAUDI_EVENT_PCIE_PHY_SERR:
7717
case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7718
case GAUDI_EVENT_MME0_ACC_SERR:
7719
case GAUDI_EVENT_MME0_SBAB_SERR:
7720
case GAUDI_EVENT_MME1_ACC_SERR:
7721
case GAUDI_EVENT_MME1_SBAB_SERR:
7722
case GAUDI_EVENT_MME2_ACC_SERR:
7723
case GAUDI_EVENT_MME2_SBAB_SERR:
7724
case GAUDI_EVENT_MME3_ACC_SERR:
7725
case GAUDI_EVENT_MME3_SBAB_SERR:
7726
case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7727
case GAUDI_EVENT_CPU_IF_ECC_SERR:
7728
case GAUDI_EVENT_PSOC_MEM_SERR:
7729
case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7730
case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7731
case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7732
case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7733
case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7734
fallthrough;
7735
case GAUDI_EVENT_MMU_SERR:
7736
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7737
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7738
hl_fw_unmask_irq(hdev, event_type);
7739
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7740
break;
7741
7742
case GAUDI_EVENT_PCIE_DEC:
7743
case GAUDI_EVENT_CPU_AXI_SPLITTER:
7744
case GAUDI_EVENT_PSOC_AXI_DEC:
7745
case GAUDI_EVENT_PSOC_PRSTN_FALL:
7746
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7747
hl_fw_unmask_irq(hdev, event_type);
7748
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7749
break;
7750
7751
case GAUDI_EVENT_MMU_PAGE_FAULT:
7752
case GAUDI_EVENT_MMU_WR_PERM:
7753
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7754
hl_fw_unmask_irq(hdev, event_type);
7755
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7756
break;
7757
7758
case GAUDI_EVENT_MME0_WBC_RSP:
7759
case GAUDI_EVENT_MME0_SBAB0_RSP:
7760
case GAUDI_EVENT_MME1_WBC_RSP:
7761
case GAUDI_EVENT_MME1_SBAB0_RSP:
7762
case GAUDI_EVENT_MME2_WBC_RSP:
7763
case GAUDI_EVENT_MME2_SBAB0_RSP:
7764
case GAUDI_EVENT_MME3_WBC_RSP:
7765
case GAUDI_EVENT_MME3_SBAB0_RSP:
7766
case GAUDI_EVENT_RAZWI_OR_ADC:
7767
case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7768
case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7769
fallthrough;
7770
case GAUDI_EVENT_NIC0_QM0:
7771
case GAUDI_EVENT_NIC0_QM1:
7772
case GAUDI_EVENT_NIC1_QM0:
7773
case GAUDI_EVENT_NIC1_QM1:
7774
case GAUDI_EVENT_NIC2_QM0:
7775
case GAUDI_EVENT_NIC2_QM1:
7776
case GAUDI_EVENT_NIC3_QM0:
7777
case GAUDI_EVENT_NIC3_QM1:
7778
case GAUDI_EVENT_NIC4_QM0:
7779
case GAUDI_EVENT_NIC4_QM1:
7780
case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7781
case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7782
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7783
gaudi_handle_qman_err(hdev, event_type, &event_mask);
7784
hl_fw_unmask_irq(hdev, event_type);
7785
event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7786
break;
7787
7788
case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7789
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7790
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7791
goto reset_device;
7792
7793
case GAUDI_EVENT_TPC0_BMON_SPMU:
7794
case GAUDI_EVENT_TPC1_BMON_SPMU:
7795
case GAUDI_EVENT_TPC2_BMON_SPMU:
7796
case GAUDI_EVENT_TPC3_BMON_SPMU:
7797
case GAUDI_EVENT_TPC4_BMON_SPMU:
7798
case GAUDI_EVENT_TPC5_BMON_SPMU:
7799
case GAUDI_EVENT_TPC6_BMON_SPMU:
7800
case GAUDI_EVENT_TPC7_BMON_SPMU:
7801
case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7802
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7803
hl_fw_unmask_irq(hdev, event_type);
7804
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7805
break;
7806
7807
case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7808
gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7809
hl_fw_unmask_irq(hdev, event_type);
7810
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7811
break;
7812
7813
case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7814
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7815
gaudi_print_sm_sei_info(hdev, event_type,
7816
&eq_entry->sm_sei_data);
7817
rc = hl_state_dump(hdev);
7818
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7819
if (rc)
7820
dev_err(hdev->dev,
7821
"Error during system state dump %d\n", rc);
7822
hl_fw_unmask_irq(hdev, event_type);
7823
break;
7824
7825
case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7826
break;
7827
7828
case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7829
gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7830
hl_fw_unmask_irq(hdev, event_type);
7831
break;
7832
7833
case GAUDI_EVENT_PSOC_GPIO_U16_0:
7834
cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7835
dev_err(hdev->dev,
7836
"Received high temp H/W interrupt %d (cause %d)\n",
7837
event_type, cause);
7838
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7839
break;
7840
7841
case GAUDI_EVENT_DEV_RESET_REQ:
7842
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7843
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7844
goto reset_device;
7845
7846
case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7847
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7848
gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7849
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7850
goto reset_device;
7851
7852
case GAUDI_EVENT_FW_ALIVE_S:
7853
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7854
gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7855
fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7856
fw_err_info.event_id = event_type;
7857
fw_err_info.event_mask = &event_mask;
7858
hl_handle_fw_err(hdev, &fw_err_info);
7859
goto reset_device;
7860
7861
default:
7862
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7863
event_type);
7864
break;
7865
}
7866
7867
if (event_mask)
7868
hl_notifier_event_send_all(hdev, event_mask);
7869
7870
return;
7871
7872
reset_device:
7873
reset_required = true;
7874
7875
if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7876
flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7877
7878
/* notify on device unavailable while the reset triggered by fw */
7879
event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7880
HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7881
} else if (hdev->hard_reset_on_fw_events) {
7882
flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7883
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7884
} else {
7885
reset_required = false;
7886
}
7887
7888
if (reset_required) {
7889
/* escalate general hw errors to critical/fatal error */
7890
if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7891
hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7892
7893
hl_device_cond_reset(hdev, flags, event_mask);
7894
} else {
7895
hl_fw_unmask_irq(hdev, event_type);
7896
/* Notification on occurred event needs to be sent although reset is not executed */
7897
if (event_mask)
7898
hl_notifier_event_send_all(hdev, event_mask);
7899
}
7900
}
7901
7902
static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7903
{
7904
struct gaudi_device *gaudi = hdev->asic_specific;
7905
7906
if (aggregate) {
7907
*size = (u32) sizeof(gaudi->events_stat_aggregate);
7908
return gaudi->events_stat_aggregate;
7909
}
7910
7911
*size = (u32) sizeof(gaudi->events_stat);
7912
return gaudi->events_stat;
7913
}
7914
7915
static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7916
{
7917
struct gaudi_device *gaudi = hdev->asic_specific;
7918
u32 status, timeout_usec;
7919
int rc;
7920
7921
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7922
hdev->reset_info.hard_reset_pending)
7923
return 0;
7924
7925
if (hdev->pldm)
7926
timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7927
else
7928
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7929
7930
/* L0 & L1 invalidation */
7931
WREG32(mmSTLB_INV_PS, 3);
7932
WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7933
WREG32(mmSTLB_INV_PS, 2);
7934
7935
rc = hl_poll_timeout(
7936
hdev,
7937
mmSTLB_INV_PS,
7938
status,
7939
!status,
7940
1000,
7941
timeout_usec);
7942
7943
WREG32(mmSTLB_INV_SET, 0);
7944
7945
return rc;
7946
}
7947
7948
static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7949
bool is_hard, u32 flags,
7950
u32 asid, u64 va, u64 size)
7951
{
7952
/* Treat as invalidate all because there is no range invalidation
7953
* in Gaudi
7954
*/
7955
return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7956
}
7957
7958
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7959
{
7960
u32 status, timeout_usec;
7961
int rc;
7962
7963
if (hdev->pldm)
7964
timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7965
else
7966
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7967
7968
WREG32(MMU_ASID, asid);
7969
WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7970
WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7971
WREG32(MMU_BUSY, 0x80000000);
7972
7973
rc = hl_poll_timeout(
7974
hdev,
7975
MMU_BUSY,
7976
status,
7977
!(status & 0x80000000),
7978
1000,
7979
timeout_usec);
7980
7981
if (rc) {
7982
dev_err(hdev->dev,
7983
"Timeout during MMU hop0 config of asid %d\n", asid);
7984
return rc;
7985
}
7986
7987
return 0;
7988
}
7989
7990
static int gaudi_send_heartbeat(struct hl_device *hdev)
7991
{
7992
struct gaudi_device *gaudi = hdev->asic_specific;
7993
7994
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7995
return 0;
7996
7997
return hl_fw_send_heartbeat(hdev);
7998
}
7999
8000
static int gaudi_cpucp_info_get(struct hl_device *hdev)
8001
{
8002
struct gaudi_device *gaudi = hdev->asic_specific;
8003
struct asic_fixed_properties *prop = &hdev->asic_prop;
8004
int rc;
8005
8006
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8007
return 0;
8008
8009
rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8010
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8011
mmCPU_BOOT_ERR1);
8012
if (rc)
8013
return rc;
8014
8015
if (!strlen(prop->cpucp_info.card_name))
8016
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8017
CARD_NAME_MAX_LEN);
8018
8019
hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8020
8021
set_default_power_values(hdev);
8022
8023
return 0;
8024
}
8025
8026
static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8027
struct engines_data *e)
8028
{
8029
struct gaudi_device *gaudi = hdev->asic_specific;
8030
const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8031
const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8032
const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8033
unsigned long *mask = (unsigned long *)mask_arr;
8034
u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8035
bool is_idle = true, is_eng_idle, is_slave;
8036
u64 offset;
8037
int i, dma_id, port;
8038
8039
if (e)
8040
hl_engine_data_sprintf(e,
8041
"\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8042
"--- ------- ------------ ---------- -------------\n");
8043
8044
for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8045
dma_id = gaudi_dma_assignment[i];
8046
offset = dma_id * DMA_QMAN_OFFSET;
8047
8048
qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8049
qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8050
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8051
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8052
IS_DMA_IDLE(dma_core_sts0);
8053
is_idle &= is_eng_idle;
8054
8055
if (mask && !is_eng_idle)
8056
set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8057
if (e)
8058
hl_engine_data_sprintf(e, fmt, dma_id,
8059
is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8060
qm_cgm_sts, dma_core_sts0);
8061
}
8062
8063
if (e)
8064
hl_engine_data_sprintf(e,
8065
"\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8066
"--- ------- ------------ ---------- ----------\n");
8067
8068
for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8069
offset = i * TPC_QMAN_OFFSET;
8070
qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8071
qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8072
tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8073
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8074
IS_TPC_IDLE(tpc_cfg_sts);
8075
is_idle &= is_eng_idle;
8076
8077
if (mask && !is_eng_idle)
8078
set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8079
if (e)
8080
hl_engine_data_sprintf(e, fmt, i,
8081
is_eng_idle ? "Y" : "N",
8082
qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8083
}
8084
8085
if (e)
8086
hl_engine_data_sprintf(e,
8087
"\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8088
"--- ------- ------------ ---------- -----------\n");
8089
8090
for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8091
offset = i * MME_QMAN_OFFSET;
8092
mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8093
is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8094
8095
/* MME 1 & 3 are slaves, no need to check their QMANs */
8096
is_slave = i % 2;
8097
if (!is_slave) {
8098
qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8099
qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8100
is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8101
}
8102
8103
is_idle &= is_eng_idle;
8104
8105
if (mask && !is_eng_idle)
8106
set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8107
if (e) {
8108
if (!is_slave)
8109
hl_engine_data_sprintf(e, fmt, i,
8110
is_eng_idle ? "Y" : "N",
8111
qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8112
else
8113
hl_engine_data_sprintf(e, mme_slave_fmt, i,
8114
is_eng_idle ? "Y" : "N", "-",
8115
"-", mme_arch_sts);
8116
}
8117
}
8118
8119
if (e)
8120
hl_engine_data_sprintf(e,
8121
"\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8122
"--- ------- ------------ ----------\n");
8123
8124
for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8125
offset = i * NIC_MACRO_QMAN_OFFSET;
8126
port = 2 * i;
8127
if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8128
qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8129
qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8130
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8131
is_idle &= is_eng_idle;
8132
8133
if (mask && !is_eng_idle)
8134
set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8135
if (e)
8136
hl_engine_data_sprintf(e, nic_fmt, port,
8137
is_eng_idle ? "Y" : "N",
8138
qm_glbl_sts0, qm_cgm_sts);
8139
}
8140
8141
port = 2 * i + 1;
8142
if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8143
qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8144
qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8145
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8146
is_idle &= is_eng_idle;
8147
8148
if (mask && !is_eng_idle)
8149
set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8150
if (e)
8151
hl_engine_data_sprintf(e, nic_fmt, port,
8152
is_eng_idle ? "Y" : "N",
8153
qm_glbl_sts0, qm_cgm_sts);
8154
}
8155
}
8156
8157
if (e)
8158
hl_engine_data_sprintf(e, "\n");
8159
8160
return is_idle;
8161
}
8162
8163
static void gaudi_hw_queues_lock(struct hl_device *hdev)
8164
__acquires(&gaudi->hw_queues_lock)
8165
{
8166
struct gaudi_device *gaudi = hdev->asic_specific;
8167
8168
spin_lock(&gaudi->hw_queues_lock);
8169
}
8170
8171
static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8172
__releases(&gaudi->hw_queues_lock)
8173
{
8174
struct gaudi_device *gaudi = hdev->asic_specific;
8175
8176
spin_unlock(&gaudi->hw_queues_lock);
8177
}
8178
8179
static u32 gaudi_get_pci_id(struct hl_device *hdev)
8180
{
8181
return hdev->pdev->device;
8182
}
8183
8184
static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8185
size_t max_size)
8186
{
8187
struct gaudi_device *gaudi = hdev->asic_specific;
8188
8189
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8190
return 0;
8191
8192
return hl_fw_get_eeprom_data(hdev, data, max_size);
8193
}
8194
8195
static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8196
{
8197
struct gaudi_device *gaudi = hdev->asic_specific;
8198
8199
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8200
return 0;
8201
8202
return hl_fw_get_monitor_dump(hdev, data);
8203
}
8204
8205
/*
8206
* this function should be used only during initialization and/or after reset,
8207
* when there are no active users.
8208
*/
8209
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8210
{
8211
u64 kernel_timeout;
8212
u32 status, offset;
8213
int rc;
8214
8215
offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8216
8217
if (hdev->pldm)
8218
kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8219
else
8220
kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8221
8222
WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8223
lower_32_bits(tpc_kernel));
8224
WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8225
upper_32_bits(tpc_kernel));
8226
8227
WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8228
lower_32_bits(tpc_kernel));
8229
WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8230
upper_32_bits(tpc_kernel));
8231
/* set a valid LUT pointer, content is of no significance */
8232
WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8233
lower_32_bits(tpc_kernel));
8234
WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8235
upper_32_bits(tpc_kernel));
8236
8237
WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8238
lower_32_bits(CFG_BASE +
8239
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8240
8241
WREG32(mmTPC0_CFG_TPC_CMD + offset,
8242
(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8243
1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8244
/* wait a bit for the engine to start executing */
8245
usleep_range(1000, 1500);
8246
8247
/* wait until engine has finished executing */
8248
rc = hl_poll_timeout(
8249
hdev,
8250
mmTPC0_CFG_STATUS + offset,
8251
status,
8252
(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8253
TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8254
1000,
8255
kernel_timeout);
8256
8257
if (rc) {
8258
dev_err(hdev->dev,
8259
"Timeout while waiting for TPC%d icache prefetch\n",
8260
tpc_id);
8261
return -EIO;
8262
}
8263
8264
WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8265
1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8266
8267
/* wait a bit for the engine to start executing */
8268
usleep_range(1000, 1500);
8269
8270
/* wait until engine has finished executing */
8271
rc = hl_poll_timeout(
8272
hdev,
8273
mmTPC0_CFG_STATUS + offset,
8274
status,
8275
(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8276
TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8277
1000,
8278
kernel_timeout);
8279
8280
if (rc) {
8281
dev_err(hdev->dev,
8282
"Timeout while waiting for TPC%d vector pipe\n",
8283
tpc_id);
8284
return -EIO;
8285
}
8286
8287
rc = hl_poll_timeout(
8288
hdev,
8289
mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8290
status,
8291
(status == 0),
8292
1000,
8293
kernel_timeout);
8294
8295
if (rc) {
8296
dev_err(hdev->dev,
8297
"Timeout while waiting for TPC%d kernel to execute\n",
8298
tpc_id);
8299
return -EIO;
8300
}
8301
8302
return 0;
8303
}
8304
8305
static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8306
struct hl_ctx *ctx)
8307
{
8308
struct gaudi_device *gaudi = hdev->asic_specific;
8309
int min_alloc_order, rc, collective_cb_size;
8310
8311
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8312
return 0;
8313
8314
hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8315
HOST_SPACE_INTERNAL_CB_SZ,
8316
&hdev->internal_cb_pool_dma_addr,
8317
GFP_KERNEL | __GFP_ZERO);
8318
8319
if (!hdev->internal_cb_pool_virt_addr)
8320
return -ENOMEM;
8321
8322
collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8323
sizeof(struct packet_fence);
8324
min_alloc_order = ilog2(collective_cb_size);
8325
8326
hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8327
if (!hdev->internal_cb_pool) {
8328
dev_err(hdev->dev,
8329
"Failed to create internal CB pool\n");
8330
rc = -ENOMEM;
8331
goto free_internal_cb_pool;
8332
}
8333
8334
rc = gen_pool_add(hdev->internal_cb_pool,
8335
(uintptr_t) hdev->internal_cb_pool_virt_addr,
8336
HOST_SPACE_INTERNAL_CB_SZ, -1);
8337
if (rc) {
8338
dev_err(hdev->dev,
8339
"Failed to add memory to internal CB pool\n");
8340
rc = -EFAULT;
8341
goto destroy_internal_cb_pool;
8342
}
8343
8344
hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8345
HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8346
HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8347
8348
if (!hdev->internal_cb_va_base) {
8349
rc = -ENOMEM;
8350
goto destroy_internal_cb_pool;
8351
}
8352
8353
mutex_lock(&hdev->mmu_lock);
8354
8355
rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8356
hdev->internal_cb_pool_dma_addr,
8357
HOST_SPACE_INTERNAL_CB_SZ);
8358
if (rc)
8359
goto unreserve_internal_cb_pool;
8360
8361
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8362
if (rc)
8363
goto unmap_internal_cb_pool;
8364
8365
mutex_unlock(&hdev->mmu_lock);
8366
8367
return 0;
8368
8369
unmap_internal_cb_pool:
8370
hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8371
HOST_SPACE_INTERNAL_CB_SZ);
8372
unreserve_internal_cb_pool:
8373
mutex_unlock(&hdev->mmu_lock);
8374
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8375
HOST_SPACE_INTERNAL_CB_SZ);
8376
destroy_internal_cb_pool:
8377
gen_pool_destroy(hdev->internal_cb_pool);
8378
free_internal_cb_pool:
8379
hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8380
hdev->internal_cb_pool_dma_addr);
8381
8382
return rc;
8383
}
8384
8385
static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8386
struct hl_ctx *ctx)
8387
{
8388
struct gaudi_device *gaudi = hdev->asic_specific;
8389
8390
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8391
return;
8392
8393
mutex_lock(&hdev->mmu_lock);
8394
hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8395
HOST_SPACE_INTERNAL_CB_SZ);
8396
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8397
HOST_SPACE_INTERNAL_CB_SZ);
8398
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8399
mutex_unlock(&hdev->mmu_lock);
8400
8401
gen_pool_destroy(hdev->internal_cb_pool);
8402
8403
hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8404
hdev->internal_cb_pool_dma_addr);
8405
}
8406
8407
static int gaudi_ctx_init(struct hl_ctx *ctx)
8408
{
8409
int rc;
8410
8411
if (ctx->asid == HL_KERNEL_ASID_ID)
8412
return 0;
8413
8414
rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8415
if (rc)
8416
return rc;
8417
8418
rc = gaudi_restore_user_registers(ctx->hdev);
8419
if (rc)
8420
gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8421
8422
return rc;
8423
}
8424
8425
static void gaudi_ctx_fini(struct hl_ctx *ctx)
8426
{
8427
if (ctx->asid == HL_KERNEL_ASID_ID)
8428
return;
8429
8430
gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8431
}
8432
8433
static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8434
{
8435
return 0;
8436
}
8437
8438
static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8439
{
8440
return gaudi_cq_assignment[cq_idx];
8441
}
8442
8443
static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8444
{
8445
return sizeof(struct packet_msg_short) +
8446
sizeof(struct packet_msg_prot) * 2;
8447
}
8448
8449
static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8450
{
8451
return sizeof(struct packet_msg_short) * 4 +
8452
sizeof(struct packet_fence) +
8453
sizeof(struct packet_msg_prot) * 2;
8454
}
8455
8456
static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8457
{
8458
return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8459
}
8460
8461
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8462
u32 size, bool eb)
8463
{
8464
struct hl_cb *cb = (struct hl_cb *) data;
8465
struct packet_msg_short *pkt;
8466
u32 value, ctl, pkt_size = sizeof(*pkt);
8467
8468
pkt = cb->kernel_address + size;
8469
memset(pkt, 0, pkt_size);
8470
8471
/* Inc by 1, Mode ADD */
8472
value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8473
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8474
8475
ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8476
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8477
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8478
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8479
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8480
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8481
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8482
8483
pkt->value = cpu_to_le32(value);
8484
pkt->ctl = cpu_to_le32(ctl);
8485
8486
return size + pkt_size;
8487
}
8488
8489
static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8490
u16 addr)
8491
{
8492
u32 ctl, pkt_size = sizeof(*pkt);
8493
8494
memset(pkt, 0, pkt_size);
8495
8496
ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8497
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8498
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8499
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8500
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8501
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8502
8503
pkt->value = cpu_to_le32(value);
8504
pkt->ctl = cpu_to_le32(ctl);
8505
8506
return pkt_size;
8507
}
8508
8509
static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8510
struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8511
u16 sob_val, u16 mon_id)
8512
{
8513
u64 monitor_base;
8514
u32 ctl, value, pkt_size = sizeof(*pkt);
8515
u16 msg_addr_offset;
8516
u8 mask;
8517
8518
if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8519
dev_err(hdev->dev,
8520
"sob_base %u (mask %#x) is not valid\n",
8521
sob_base, sob_mask);
8522
return 0;
8523
}
8524
8525
/*
8526
* monitor_base should be the content of the base0 address registers,
8527
* so it will be added to the msg short offsets
8528
*/
8529
monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8530
8531
msg_addr_offset =
8532
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8533
monitor_base;
8534
8535
memset(pkt, 0, pkt_size);
8536
8537
/* Monitor config packet: bind the monitor to a sync object */
8538
value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8539
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8540
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8541
0); /* GREATER OR EQUAL*/
8542
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8543
8544
ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8545
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8546
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8547
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8548
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8549
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8550
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8551
8552
pkt->value = cpu_to_le32(value);
8553
pkt->ctl = cpu_to_le32(ctl);
8554
8555
return pkt_size;
8556
}
8557
8558
static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8559
{
8560
u32 ctl, cfg, pkt_size = sizeof(*pkt);
8561
8562
memset(pkt, 0, pkt_size);
8563
8564
cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8565
cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8566
cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8567
8568
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8569
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8570
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8571
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8572
8573
pkt->cfg = cpu_to_le32(cfg);
8574
pkt->ctl = cpu_to_le32(ctl);
8575
8576
return pkt_size;
8577
}
8578
8579
static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8580
{
8581
u32 offset, nic_index;
8582
8583
switch (queue_id) {
8584
case GAUDI_QUEUE_ID_DMA_0_0:
8585
offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8586
break;
8587
case GAUDI_QUEUE_ID_DMA_0_1:
8588
offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8589
break;
8590
case GAUDI_QUEUE_ID_DMA_0_2:
8591
offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8592
break;
8593
case GAUDI_QUEUE_ID_DMA_0_3:
8594
offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8595
break;
8596
case GAUDI_QUEUE_ID_DMA_1_0:
8597
offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8598
break;
8599
case GAUDI_QUEUE_ID_DMA_1_1:
8600
offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8601
break;
8602
case GAUDI_QUEUE_ID_DMA_1_2:
8603
offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8604
break;
8605
case GAUDI_QUEUE_ID_DMA_1_3:
8606
offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8607
break;
8608
case GAUDI_QUEUE_ID_DMA_5_0:
8609
offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8610
break;
8611
case GAUDI_QUEUE_ID_DMA_5_1:
8612
offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8613
break;
8614
case GAUDI_QUEUE_ID_DMA_5_2:
8615
offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8616
break;
8617
case GAUDI_QUEUE_ID_DMA_5_3:
8618
offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8619
break;
8620
case GAUDI_QUEUE_ID_TPC_7_0:
8621
offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8622
break;
8623
case GAUDI_QUEUE_ID_TPC_7_1:
8624
offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8625
break;
8626
case GAUDI_QUEUE_ID_TPC_7_2:
8627
offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8628
break;
8629
case GAUDI_QUEUE_ID_TPC_7_3:
8630
offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8631
break;
8632
case GAUDI_QUEUE_ID_NIC_0_0:
8633
case GAUDI_QUEUE_ID_NIC_1_0:
8634
case GAUDI_QUEUE_ID_NIC_2_0:
8635
case GAUDI_QUEUE_ID_NIC_3_0:
8636
case GAUDI_QUEUE_ID_NIC_4_0:
8637
case GAUDI_QUEUE_ID_NIC_5_0:
8638
case GAUDI_QUEUE_ID_NIC_6_0:
8639
case GAUDI_QUEUE_ID_NIC_7_0:
8640
case GAUDI_QUEUE_ID_NIC_8_0:
8641
case GAUDI_QUEUE_ID_NIC_9_0:
8642
nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8643
offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8644
(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8645
(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8646
break;
8647
case GAUDI_QUEUE_ID_NIC_0_1:
8648
case GAUDI_QUEUE_ID_NIC_1_1:
8649
case GAUDI_QUEUE_ID_NIC_2_1:
8650
case GAUDI_QUEUE_ID_NIC_3_1:
8651
case GAUDI_QUEUE_ID_NIC_4_1:
8652
case GAUDI_QUEUE_ID_NIC_5_1:
8653
case GAUDI_QUEUE_ID_NIC_6_1:
8654
case GAUDI_QUEUE_ID_NIC_7_1:
8655
case GAUDI_QUEUE_ID_NIC_8_1:
8656
case GAUDI_QUEUE_ID_NIC_9_1:
8657
nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8658
offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8659
(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8660
(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8661
break;
8662
case GAUDI_QUEUE_ID_NIC_0_2:
8663
case GAUDI_QUEUE_ID_NIC_1_2:
8664
case GAUDI_QUEUE_ID_NIC_2_2:
8665
case GAUDI_QUEUE_ID_NIC_3_2:
8666
case GAUDI_QUEUE_ID_NIC_4_2:
8667
case GAUDI_QUEUE_ID_NIC_5_2:
8668
case GAUDI_QUEUE_ID_NIC_6_2:
8669
case GAUDI_QUEUE_ID_NIC_7_2:
8670
case GAUDI_QUEUE_ID_NIC_8_2:
8671
case GAUDI_QUEUE_ID_NIC_9_2:
8672
nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8673
offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8674
(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8675
(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8676
break;
8677
case GAUDI_QUEUE_ID_NIC_0_3:
8678
case GAUDI_QUEUE_ID_NIC_1_3:
8679
case GAUDI_QUEUE_ID_NIC_2_3:
8680
case GAUDI_QUEUE_ID_NIC_3_3:
8681
case GAUDI_QUEUE_ID_NIC_4_3:
8682
case GAUDI_QUEUE_ID_NIC_5_3:
8683
case GAUDI_QUEUE_ID_NIC_6_3:
8684
case GAUDI_QUEUE_ID_NIC_7_3:
8685
case GAUDI_QUEUE_ID_NIC_8_3:
8686
case GAUDI_QUEUE_ID_NIC_9_3:
8687
nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8688
offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8689
(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8690
(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8691
break;
8692
default:
8693
return -EINVAL;
8694
}
8695
8696
*addr = CFG_BASE + offset;
8697
8698
return 0;
8699
}
8700
8701
static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8702
{
8703
u64 monitor_base;
8704
u32 size = 0;
8705
u16 msg_addr_offset;
8706
8707
/*
8708
* monitor_base should be the content of the base0 address registers,
8709
* so it will be added to the msg short offsets
8710
*/
8711
monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8712
8713
/* First monitor config packet: low address of the sync */
8714
msg_addr_offset =
8715
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8716
monitor_base;
8717
8718
size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8719
msg_addr_offset);
8720
8721
/* Second monitor config packet: high address of the sync */
8722
msg_addr_offset =
8723
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8724
monitor_base;
8725
8726
size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8727
msg_addr_offset);
8728
8729
/*
8730
* Third monitor config packet: the payload, i.e. what to write when the
8731
* sync triggers
8732
*/
8733
msg_addr_offset =
8734
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8735
monitor_base;
8736
8737
size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8738
8739
return size;
8740
}
8741
8742
static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8743
struct hl_gen_wait_properties *prop)
8744
{
8745
struct hl_cb *cb = (struct hl_cb *) prop->data;
8746
void *buf = cb->kernel_address;
8747
u64 fence_addr = 0;
8748
u32 size = prop->size;
8749
8750
if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8751
dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8752
prop->q_idx);
8753
return 0;
8754
}
8755
8756
size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8757
size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8758
prop->sob_mask, prop->sob_val, prop->mon_id);
8759
size += gaudi_add_fence_pkt(buf + size);
8760
8761
return size;
8762
}
8763
8764
static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8765
{
8766
struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8767
8768
dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8769
hw_sob->sob_id);
8770
8771
WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8772
hw_sob->sob_id * 4, 0);
8773
8774
kref_init(&hw_sob->kref);
8775
}
8776
8777
static u64 gaudi_get_device_time(struct hl_device *hdev)
8778
{
8779
u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8780
8781
return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8782
}
8783
8784
static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8785
u32 *block_size, u32 *block_id)
8786
{
8787
return -EPERM;
8788
}
8789
8790
static int gaudi_block_mmap(struct hl_device *hdev,
8791
struct vm_area_struct *vma,
8792
u32 block_id, u32 block_size)
8793
{
8794
return -EPERM;
8795
}
8796
8797
static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8798
{
8799
struct cpu_dyn_regs *dyn_regs =
8800
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8801
u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8802
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8803
le32_to_cpu(dyn_regs->gic_host_ints_irq);
8804
8805
WREG32(irq_handler_offset,
8806
gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8807
}
8808
8809
static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8810
{
8811
return -EINVAL;
8812
}
8813
8814
static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8815
{
8816
switch (pll_idx) {
8817
case HL_GAUDI_CPU_PLL: return CPU_PLL;
8818
case HL_GAUDI_PCI_PLL: return PCI_PLL;
8819
case HL_GAUDI_NIC_PLL: return NIC_PLL;
8820
case HL_GAUDI_DMA_PLL: return DMA_PLL;
8821
case HL_GAUDI_MESH_PLL: return MESH_PLL;
8822
case HL_GAUDI_MME_PLL: return MME_PLL;
8823
case HL_GAUDI_TPC_PLL: return TPC_PLL;
8824
case HL_GAUDI_IF_PLL: return IF_PLL;
8825
case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8826
case HL_GAUDI_HBM_PLL: return HBM_PLL;
8827
default: return -EINVAL;
8828
}
8829
}
8830
8831
static int gaudi_add_sync_to_engine_map_entry(
8832
struct hl_sync_to_engine_map *map, u32 reg_value,
8833
enum hl_sync_engine_type engine_type, u32 engine_id)
8834
{
8835
struct hl_sync_to_engine_map_entry *entry;
8836
8837
/* Reg value represents a partial address of sync object,
8838
* it is used as unique identifier. For this we need to
8839
* clear the cutoff cfg base bits from the value.
8840
*/
8841
if (reg_value == 0 || reg_value == 0xffffffff)
8842
return 0;
8843
reg_value -= lower_32_bits(CFG_BASE);
8844
8845
/* create a new hash entry */
8846
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8847
if (!entry)
8848
return -ENOMEM;
8849
entry->engine_type = engine_type;
8850
entry->engine_id = engine_id;
8851
entry->sync_id = reg_value;
8852
hash_add(map->tb, &entry->node, reg_value);
8853
8854
return 0;
8855
}
8856
8857
static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8858
struct hl_sync_to_engine_map *map)
8859
{
8860
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8861
int i, j, rc;
8862
u32 reg_value;
8863
8864
/* Iterate over TPC engines */
8865
for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8866
8867
reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8868
sds->props[SP_NEXT_TPC] * i);
8869
8870
rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8871
ENGINE_TPC, i);
8872
if (rc)
8873
goto free_sync_to_engine_map;
8874
}
8875
8876
/* Iterate over MME engines */
8877
for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8878
for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8879
8880
reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8881
sds->props[SP_NEXT_MME] * i +
8882
j * sizeof(u32));
8883
8884
rc = gaudi_add_sync_to_engine_map_entry(
8885
map, reg_value, ENGINE_MME,
8886
i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8887
if (rc)
8888
goto free_sync_to_engine_map;
8889
}
8890
}
8891
8892
/* Iterate over DMA engines */
8893
for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8894
reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8895
sds->props[SP_DMA_QUEUES_OFFSET] * i);
8896
rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8897
ENGINE_DMA, i);
8898
if (rc)
8899
goto free_sync_to_engine_map;
8900
}
8901
8902
return 0;
8903
8904
free_sync_to_engine_map:
8905
hl_state_dump_free_sync_to_engine_map(map);
8906
8907
return rc;
8908
}
8909
8910
static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8911
{
8912
return FIELD_GET(
8913
SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8914
mon->status);
8915
}
8916
8917
static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8918
{
8919
const size_t max_write = 10;
8920
u32 gid, mask, sob;
8921
int i, offset;
8922
8923
/* Sync object ID is calculated as follows:
8924
* (8 * group_id + cleared bits in mask)
8925
*/
8926
gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8927
mon->arm_data);
8928
mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8929
mon->arm_data);
8930
8931
for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8932
max_write; mask >>= 1, i++) {
8933
if (!(mask & 1)) {
8934
sob = gid * MONITOR_MAX_SOBS + i;
8935
8936
if (offset > 0)
8937
offset += snprintf(sobs + offset, max_write,
8938
", ");
8939
8940
offset += snprintf(sobs + offset, max_write, "%u", sob);
8941
}
8942
}
8943
}
8944
8945
static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8946
struct hl_device *hdev,
8947
struct hl_mon_state_dump *mon)
8948
{
8949
const char *name;
8950
char scratch_buf1[BIN_REG_STRING_SIZE],
8951
scratch_buf2[BIN_REG_STRING_SIZE];
8952
char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8953
8954
name = hl_state_dump_get_monitor_name(hdev, mon);
8955
if (!name)
8956
name = "";
8957
8958
gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8959
8960
return hl_snprintf_resize(
8961
buf, size, offset,
8962
"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8963
mon->id, name,
8964
FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8965
mon->arm_data),
8966
hl_format_as_binary(
8967
scratch_buf1, sizeof(scratch_buf1),
8968
FIELD_GET(
8969
SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8970
mon->arm_data)),
8971
FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8972
mon->arm_data),
8973
mon->wr_data,
8974
(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8975
hl_format_as_binary(
8976
scratch_buf2, sizeof(scratch_buf2),
8977
FIELD_GET(
8978
SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8979
mon->status)),
8980
monitored_sobs);
8981
}
8982
8983
8984
static int gaudi_print_fences_single_engine(
8985
struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8986
enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8987
size_t *size, size_t *offset)
8988
{
8989
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8990
int rc = -ENOMEM, i;
8991
u32 *statuses, *fences;
8992
8993
statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8994
sizeof(*statuses), GFP_KERNEL);
8995
if (!statuses)
8996
goto out;
8997
8998
fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8999
sds->props[SP_ENGINE_NUM_OF_QUEUES],
9000
sizeof(*fences), GFP_KERNEL);
9001
if (!fences)
9002
goto free_status;
9003
9004
for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9005
statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9006
9007
for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9008
sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9009
fences[i] = RREG32(base_offset + i * sizeof(u32));
9010
9011
/* The actual print */
9012
for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9013
u32 fence_id;
9014
u64 fence_cnt, fence_rdata;
9015
const char *engine_name;
9016
9017
if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9018
statuses[i]))
9019
continue;
9020
9021
fence_id =
9022
FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9023
fence_cnt = base_offset + CFG_BASE +
9024
sizeof(u32) *
9025
(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9026
fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9027
sds->props[SP_FENCE0_RDATA_OFFSET];
9028
engine_name = hl_sync_engine_to_string(engine_type);
9029
9030
rc = hl_snprintf_resize(
9031
buf, size, offset,
9032
"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9033
engine_name, engine_id,
9034
i, fence_id,
9035
fence_cnt, engine_name, engine_id, fence_id, i,
9036
fence_rdata, engine_name, engine_id, fence_id, i,
9037
fences[fence_id],
9038
statuses[i]);
9039
if (rc)
9040
goto free_fences;
9041
}
9042
9043
rc = 0;
9044
9045
free_fences:
9046
kfree(fences);
9047
free_status:
9048
kfree(statuses);
9049
out:
9050
return rc;
9051
}
9052
9053
9054
static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9055
.monitor_valid = gaudi_monitor_valid,
9056
.print_single_monitor = gaudi_print_single_monitor,
9057
.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9058
.print_fences_single_engine = gaudi_print_fences_single_engine,
9059
};
9060
9061
static void gaudi_state_dump_init(struct hl_device *hdev)
9062
{
9063
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9064
int i;
9065
9066
for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9067
hash_add(sds->so_id_to_str_tb,
9068
&gaudi_so_id_to_str[i].node,
9069
gaudi_so_id_to_str[i].id);
9070
9071
for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9072
hash_add(sds->monitor_id_to_str_tb,
9073
&gaudi_monitor_id_to_str[i].node,
9074
gaudi_monitor_id_to_str[i].id);
9075
9076
sds->props = gaudi_state_dump_specs_props;
9077
9078
sds->sync_namager_names = gaudi_sync_manager_names;
9079
9080
sds->funcs = gaudi_state_dump_funcs;
9081
}
9082
9083
static u32 *gaudi_get_stream_master_qid_arr(void)
9084
{
9085
return gaudi_stream_master;
9086
}
9087
9088
static int gaudi_set_dram_properties(struct hl_device *hdev)
9089
{
9090
return 0;
9091
}
9092
9093
static int gaudi_set_binning_masks(struct hl_device *hdev)
9094
{
9095
return 0;
9096
}
9097
9098
static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9099
{
9100
}
9101
9102
static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9103
{
9104
struct hl_device *hdev = dev_get_drvdata(dev);
9105
struct cpucp_info *cpucp_info;
9106
9107
cpucp_info = &hdev->asic_prop.cpucp_info;
9108
9109
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9110
}
9111
9112
static DEVICE_ATTR_RO(infineon_ver);
9113
9114
static struct attribute *gaudi_vrm_dev_attrs[] = {
9115
&dev_attr_infineon_ver.attr,
9116
NULL,
9117
};
9118
9119
static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9120
struct attribute_group *dev_vrm_attr_grp)
9121
{
9122
hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9123
dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9124
}
9125
9126
static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9127
{
9128
return 0;
9129
}
9130
9131
static const struct hl_asic_funcs gaudi_funcs = {
9132
.early_init = gaudi_early_init,
9133
.early_fini = gaudi_early_fini,
9134
.late_init = gaudi_late_init,
9135
.late_fini = gaudi_late_fini,
9136
.sw_init = gaudi_sw_init,
9137
.sw_fini = gaudi_sw_fini,
9138
.hw_init = gaudi_hw_init,
9139
.hw_fini = gaudi_hw_fini,
9140
.halt_engines = gaudi_halt_engines,
9141
.suspend = gaudi_suspend,
9142
.resume = gaudi_resume,
9143
.mmap = gaudi_mmap,
9144
.ring_doorbell = gaudi_ring_doorbell,
9145
.pqe_write = gaudi_pqe_write,
9146
.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9147
.asic_dma_free_coherent = gaudi_dma_free_coherent,
9148
.scrub_device_mem = gaudi_scrub_device_mem,
9149
.scrub_device_dram = gaudi_scrub_device_dram,
9150
.get_int_queue_base = gaudi_get_int_queue_base,
9151
.test_queues = gaudi_test_queues,
9152
.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9153
.asic_dma_pool_free = gaudi_dma_pool_free,
9154
.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9155
.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9156
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9157
.cs_parser = gaudi_cs_parser,
9158
.dma_map_sgtable = hl_asic_dma_map_sgtable,
9159
.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9160
.update_eq_ci = gaudi_update_eq_ci,
9161
.context_switch = gaudi_context_switch,
9162
.restore_phase_topology = gaudi_restore_phase_topology,
9163
.debugfs_read_dma = gaudi_debugfs_read_dma,
9164
.add_device_attr = gaudi_add_device_attr,
9165
.handle_eqe = gaudi_handle_eqe,
9166
.get_events_stat = gaudi_get_events_stat,
9167
.read_pte = gaudi_read_pte,
9168
.write_pte = gaudi_write_pte,
9169
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9170
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9171
.mmu_prefetch_cache_range = NULL,
9172
.send_heartbeat = gaudi_send_heartbeat,
9173
.debug_coresight = gaudi_debug_coresight,
9174
.is_device_idle = gaudi_is_device_idle,
9175
.compute_reset_late_init = gaudi_compute_reset_late_init,
9176
.hw_queues_lock = gaudi_hw_queues_lock,
9177
.hw_queues_unlock = gaudi_hw_queues_unlock,
9178
.get_pci_id = gaudi_get_pci_id,
9179
.get_eeprom_data = gaudi_get_eeprom_data,
9180
.get_monitor_dump = gaudi_get_monitor_dump,
9181
.send_cpu_message = gaudi_send_cpu_message,
9182
.pci_bars_map = gaudi_pci_bars_map,
9183
.init_iatu = gaudi_init_iatu,
9184
.rreg = hl_rreg,
9185
.wreg = hl_wreg,
9186
.halt_coresight = gaudi_halt_coresight,
9187
.ctx_init = gaudi_ctx_init,
9188
.ctx_fini = gaudi_ctx_fini,
9189
.pre_schedule_cs = gaudi_pre_schedule_cs,
9190
.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9191
.load_firmware_to_device = gaudi_load_firmware_to_device,
9192
.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9193
.get_signal_cb_size = gaudi_get_signal_cb_size,
9194
.get_wait_cb_size = gaudi_get_wait_cb_size,
9195
.gen_signal_cb = gaudi_gen_signal_cb,
9196
.gen_wait_cb = gaudi_gen_wait_cb,
9197
.reset_sob = gaudi_reset_sob,
9198
.reset_sob_group = gaudi_reset_sob_group,
9199
.get_device_time = gaudi_get_device_time,
9200
.pb_print_security_errors = NULL,
9201
.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9202
.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9203
.get_dec_base_addr = NULL,
9204
.scramble_addr = hl_mmu_scramble_addr,
9205
.descramble_addr = hl_mmu_descramble_addr,
9206
.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9207
.get_hw_block_id = gaudi_get_hw_block_id,
9208
.hw_block_mmap = gaudi_block_mmap,
9209
.enable_events_from_fw = gaudi_enable_events_from_fw,
9210
.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9211
.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9212
.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9213
.init_firmware_loader = gaudi_init_firmware_loader,
9214
.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9215
.state_dump_init = gaudi_state_dump_init,
9216
.get_sob_addr = gaudi_get_sob_addr,
9217
.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9218
.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9219
.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9220
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9221
.access_dev_mem = hl_access_dev_mem,
9222
.set_dram_bar_base = gaudi_set_hbm_bar_base,
9223
.send_device_activity = gaudi_send_device_activity,
9224
.set_dram_properties = gaudi_set_dram_properties,
9225
.set_binning_masks = gaudi_set_binning_masks,
9226
};
9227
9228
/**
9229
* gaudi_set_asic_funcs - set GAUDI function pointers
9230
*
9231
* @hdev: pointer to hl_device structure
9232
*
9233
*/
9234
void gaudi_set_asic_funcs(struct hl_device *hdev)
9235
{
9236
hdev->asic_funcs = &gaudi_funcs;
9237
}
9238
9239