Path: blob/master/drivers/accel/habanalabs/gaudi/gaudi.c
29286 views
// SPDX-License-Identifier: GPL-2.012/*3* Copyright 2016-2022 HabanaLabs, Ltd.4* All Rights Reserved.5*/67#include "gaudiP.h"8#include "../include/hw_ip/mmu/mmu_general.h"9#include "../include/hw_ip/mmu/mmu_v1_1.h"10#include "../include/gaudi/gaudi_masks.h"11#include "../include/gaudi/gaudi_fw_if.h"12#include "../include/gaudi/gaudi_reg_map.h"13#include "../include/gaudi/gaudi_async_ids_map_extended.h"1415#include <linux/module.h>16#include <linux/pci.h>17#include <linux/firmware.h>18#include <linux/hwmon.h>19#include <linux/iommu.h>20#include <linux/seq_file.h>2122/*23* Gaudi security scheme:24*25* 1. Host is protected by:26* - Range registers27* - MMU28*29* 2. DDR is protected by:30* - Range registers (protect the first 512MB)31*32* 3. Configuration is protected by:33* - Range registers34* - Protection bits35*36* MMU is always enabled.37*38* QMAN DMA channels 0,1 (PCI DMAN):39* - DMA is not secured.40* - PQ and CQ are secured.41* - CP is secured: The driver needs to parse CB but WREG should be allowed42* because of TDMA (tensor DMA). Hence, WREG is always not43* secured.44*45* When the driver needs to use DMA it will check that Gaudi is idle, set DMA46* channel 0 to be secured, execute the DMA and change it back to not secured.47* Currently, the driver doesn't use the DMA while there are compute jobs48* running.49*50* The current use cases for the driver to use the DMA are:51* - Clear SRAM on context switch (happens on context switch when device is52* idle)53* - MMU page tables area clear (happens on init)54*55* QMAN DMA 2-7, TPC, MME, NIC:56* PQ is secured and is located on the Host (HBM CON TPC3 bug)57* CQ, CP and the engine are not secured58*59*/6061#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"62#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"63#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"6465MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);66MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);67MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);6869#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */7071#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */72#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */73#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */74#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */7576#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */77#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */78#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */79#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)80#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)81#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)82#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */83#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */84#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */8586#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB98788#define GAUDI_MAX_STRING_LEN 208990#define GAUDI_CB_POOL_CB_CNT 51291#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */9293#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 39495#define GAUDI_NUM_OF_TPC_INTR_CAUSE 209697#define GAUDI_NUM_OF_QM_ERR_CAUSE 169899#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3100101#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */102103#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */104105#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")106107#define MONITOR_SOB_STRING_SIZE 256108109static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {110GAUDI_QUEUE_ID_DMA_0_0,111GAUDI_QUEUE_ID_DMA_0_1,112GAUDI_QUEUE_ID_DMA_0_2,113GAUDI_QUEUE_ID_DMA_0_3,114GAUDI_QUEUE_ID_DMA_1_0,115GAUDI_QUEUE_ID_DMA_1_1,116GAUDI_QUEUE_ID_DMA_1_2,117GAUDI_QUEUE_ID_DMA_1_3118};119120static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {121[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,122[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,123[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,124[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,125[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,126[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,127[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,128[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7129};130131static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {132[0] = GAUDI_QUEUE_ID_DMA_0_0,133[1] = GAUDI_QUEUE_ID_DMA_0_1,134[2] = GAUDI_QUEUE_ID_DMA_0_2,135[3] = GAUDI_QUEUE_ID_DMA_0_3,136[4] = GAUDI_QUEUE_ID_DMA_1_0,137[5] = GAUDI_QUEUE_ID_DMA_1_1,138[6] = GAUDI_QUEUE_ID_DMA_1_2,139[7] = GAUDI_QUEUE_ID_DMA_1_3,140};141142static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {143[PACKET_WREG_32] = sizeof(struct packet_wreg32),144[PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),145[PACKET_MSG_LONG] = sizeof(struct packet_msg_long),146[PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),147[PACKET_CP_DMA] = sizeof(struct packet_cp_dma),148[PACKET_REPEAT] = sizeof(struct packet_repeat),149[PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),150[PACKET_FENCE] = sizeof(struct packet_fence),151[PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),152[PACKET_NOP] = sizeof(struct packet_nop),153[PACKET_STOP] = sizeof(struct packet_stop),154[PACKET_ARB_POINT] = sizeof(struct packet_arb_point),155[PACKET_WAIT] = sizeof(struct packet_wait),156[PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)157};158159static inline bool validate_packet_id(enum packet_id id)160{161switch (id) {162case PACKET_WREG_32:163case PACKET_WREG_BULK:164case PACKET_MSG_LONG:165case PACKET_MSG_SHORT:166case PACKET_CP_DMA:167case PACKET_REPEAT:168case PACKET_MSG_PROT:169case PACKET_FENCE:170case PACKET_LIN_DMA:171case PACKET_NOP:172case PACKET_STOP:173case PACKET_ARB_POINT:174case PACKET_WAIT:175case PACKET_LOAD_AND_EXE:176return true;177default:178return false;179}180}181182static const char * const183gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {184"tpc_address_exceed_slm",185"tpc_div_by_0",186"tpc_spu_mac_overflow",187"tpc_spu_addsub_overflow",188"tpc_spu_abs_overflow",189"tpc_spu_fp_dst_nan_inf",190"tpc_spu_fp_dst_denorm",191"tpc_vpu_mac_overflow",192"tpc_vpu_addsub_overflow",193"tpc_vpu_abs_overflow",194"tpc_vpu_fp_dst_nan_inf",195"tpc_vpu_fp_dst_denorm",196"tpc_assertions",197"tpc_illegal_instruction",198"tpc_pc_wrap_around",199"tpc_qm_sw_err",200"tpc_hbw_rresp_err",201"tpc_hbw_bresp_err",202"tpc_lbw_rresp_err",203"tpc_lbw_bresp_err"204};205206static const char * const207gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {208"PQ AXI HBW error",209"CQ AXI HBW error",210"CP AXI HBW error",211"CP error due to undefined OPCODE",212"CP encountered STOP OPCODE",213"CP AXI LBW error",214"CP WRREG32 or WRBULK returned error",215"N/A",216"FENCE 0 inc over max value and clipped",217"FENCE 1 inc over max value and clipped",218"FENCE 2 inc over max value and clipped",219"FENCE 3 inc over max value and clipped",220"FENCE 0 dec under min value and clipped",221"FENCE 1 dec under min value and clipped",222"FENCE 2 dec under min value and clipped",223"FENCE 3 dec under min value and clipped"224};225226static const char * const227gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {228"Choice push while full error",229"Choice Q watchdog error",230"MSG AXI LBW returned with error"231};232233static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {234QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */235QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */236QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */237QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */238QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */239QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */240QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */241QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */242QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */243QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */244QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */245QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */246QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */247QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */248QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */249QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */250QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */251QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */252QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */253QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */254QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */255QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */256QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */257QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */258QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */259QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */260QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */261QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */262QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */263QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */264QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */265QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */266QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */267QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */268QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */269QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */270QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */271QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */272QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */273QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */274QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */275QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */276QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */277QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */278QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */279QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */280QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */281QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */282QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */283QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */284QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */285QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */286QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */287QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */288QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */289QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */290QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */291QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */292QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */293QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */294QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */295QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */296QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */297QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */298QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */299QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */300QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */301QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */302QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */303QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */304QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */305QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */306QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */307QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */308QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */309QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */310QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */311QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */312QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */313QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */314QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */315QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */316QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */317QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */318QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */319QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */320QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */321QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */322QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */323QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */324QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */325QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */326QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */327QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */328QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */329QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */330QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */331QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */332QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */333QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */334QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */335QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */336QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */337QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */338QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */339QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */340QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */341QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */342QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */343QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */344QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */345QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */346QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */347};348349static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {350{ .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },351{ .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },352{ .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },353{ .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },354{ .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },355{ .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },356{ .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },357{ .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },358{ .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },359{ .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },360{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },361{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },362{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },363{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },364{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },365{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },366{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },367{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },368{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },369{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },370{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },371{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },372{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },373{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },374{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },375{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },376{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },377};378379static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {380{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },381{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },382{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },383{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },384{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },385{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },386{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },387{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },388{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },389{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },390{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },391};392393static s64 gaudi_state_dump_specs_props[] = {394[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,395[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,396[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,397[SP_MON_OBJ_WR_ADDR_LOW] =398mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,399[SP_MON_OBJ_WR_ADDR_HIGH] =400mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,401[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,402[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,403[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,404[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,405[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,406[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,407[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,408[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,409[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,410[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,411[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,412[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,413[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,414[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,415[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,416[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,417[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,418[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,419[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,420[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,421[SP_FENCE0_CNT_OFFSET] =422mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,423[SP_FENCE0_RDATA_OFFSET] =424mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,425[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,426[SP_NUM_CORES] = 1,427};428429static const int gaudi_queue_id_to_engine_id[] = {430[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,431[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,432[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,433[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,434[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,435[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,436[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,437[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,438[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,439[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,440[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,441[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,442[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,443[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,444[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,445[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,446[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,447[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,448[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,449[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,450[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,451[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,452[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,453[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,454[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,455[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,456[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,457[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,458[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,459};460461/* The order here is opposite to the order of the indexing in the h/w.462* i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.463*/464static const char * const gaudi_sync_manager_names[] = {465"SYNC_MGR_E_N",466"SYNC_MGR_W_N",467"SYNC_MGR_E_S",468"SYNC_MGR_W_S",469NULL470};471472struct ecc_info_extract_params {473u64 block_address;474u32 num_memories;475bool derr;476};477478static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,479u64 phys_addr);480static int gaudi_send_job_on_qman0(struct hl_device *hdev,481struct hl_cs_job *job);482static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,483u32 size, u64 val);484static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,485u32 num_regs, u32 val);486static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,487u32 tpc_id);488static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);489static int gaudi_cpucp_info_get(struct hl_device *hdev);490static void gaudi_disable_clock_gating(struct hl_device *hdev);491static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);492static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,493u32 size, bool eb);494static u32 gaudi_gen_wait_cb(struct hl_device *hdev,495struct hl_gen_wait_properties *prop);496static inline enum hl_collective_mode497get_collective_mode(struct hl_device *hdev, u32 queue_id)498{499if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)500return HL_COLLECTIVE_MASTER;501502if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&503queue_id <= GAUDI_QUEUE_ID_DMA_5_3)504return HL_COLLECTIVE_SLAVE;505506if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&507queue_id <= GAUDI_QUEUE_ID_TPC_7_3)508return HL_COLLECTIVE_SLAVE;509510if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&511queue_id <= GAUDI_QUEUE_ID_NIC_9_3)512return HL_COLLECTIVE_SLAVE;513514return HL_COLLECTIVE_NOT_SUPPORTED;515}516517static inline void set_default_power_values(struct hl_device *hdev)518{519struct asic_fixed_properties *prop = &hdev->asic_prop;520521if (hdev->card_type == cpucp_card_type_pmc) {522prop->max_power_default = MAX_POWER_DEFAULT_PMC;523524if (prop->fw_security_enabled)525prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;526else527prop->dc_power_default = DC_POWER_DEFAULT_PMC;528} else {529prop->max_power_default = MAX_POWER_DEFAULT_PCI;530prop->dc_power_default = DC_POWER_DEFAULT_PCI;531}532}533534static int gaudi_set_fixed_properties(struct hl_device *hdev)535{536struct asic_fixed_properties *prop = &hdev->asic_prop;537u32 num_sync_stream_queues = 0;538int i;539540prop->max_queues = GAUDI_QUEUE_ID_SIZE;541prop->hw_queues_props = kcalloc(prop->max_queues,542sizeof(struct hw_queue_properties),543GFP_KERNEL);544545if (!prop->hw_queues_props)546return -ENOMEM;547548for (i = 0 ; i < prop->max_queues ; i++) {549if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {550prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;551prop->hw_queues_props[i].driver_only = 0;552prop->hw_queues_props[i].supports_sync_stream = 1;553prop->hw_queues_props[i].cb_alloc_flags =554CB_ALLOC_KERNEL;555num_sync_stream_queues++;556} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {557prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;558prop->hw_queues_props[i].driver_only = 1;559prop->hw_queues_props[i].supports_sync_stream = 0;560prop->hw_queues_props[i].cb_alloc_flags =561CB_ALLOC_KERNEL;562} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {563prop->hw_queues_props[i].type = QUEUE_TYPE_INT;564prop->hw_queues_props[i].driver_only = 0;565prop->hw_queues_props[i].supports_sync_stream = 0;566prop->hw_queues_props[i].cb_alloc_flags =567CB_ALLOC_USER;568569}570prop->hw_queues_props[i].collective_mode =571get_collective_mode(hdev, i);572}573574prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;575prop->cfg_base_address = CFG_BASE;576prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;577prop->host_base_address = HOST_PHYS_BASE;578prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;579prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;580prop->completion_mode = HL_COMPLETION_MODE_JOB;581prop->collective_first_sob = 0;582prop->collective_first_mon = 0;583584/* 2 SOBs per internal queue stream are reserved for collective */585prop->sync_stream_first_sob =586ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)587* QMAN_STREAMS * HL_RSVD_SOBS;588589/* 1 monitor per internal queue stream are reserved for collective590* 2 monitors per external queue stream are reserved for collective591*/592prop->sync_stream_first_mon =593(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +594(NUMBER_OF_EXT_HW_QUEUES * 2);595596prop->dram_base_address = DRAM_PHYS_BASE;597prop->dram_size = GAUDI_HBM_SIZE_32GB;598prop->dram_end_address = prop->dram_base_address + prop->dram_size;599prop->dram_user_base_address = DRAM_BASE_ADDR_USER;600601prop->sram_base_address = SRAM_BASE_ADDR;602prop->sram_size = SRAM_SIZE;603prop->sram_end_address = prop->sram_base_address + prop->sram_size;604prop->sram_user_base_address =605prop->sram_base_address + SRAM_USER_BASE_OFFSET;606607prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;608prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;609610prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;611if (hdev->pldm)612prop->mmu_pgt_size = 0x800000; /* 8MB */613else614prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;615prop->mmu_pte_size = HL_PTE_SIZE;616prop->dram_page_size = PAGE_SIZE_2MB;617prop->device_mem_alloc_default_page_size = prop->dram_page_size;618prop->dram_supports_virtual_memory = false;619620prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;621prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;622prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;623prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;624prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;625prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;626prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;627prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;628prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;629prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;630prop->pmmu.start_addr = VA_HOST_SPACE_START;631prop->pmmu.end_addr =632(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;633prop->pmmu.page_size = PAGE_SIZE_4KB;634prop->pmmu.num_hops = MMU_ARCH_5_HOPS;635prop->pmmu.last_mask = LAST_MASK;636/* TODO: will be duplicated until implementing per-MMU props */637prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;638prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;639640/* PMMU and HPMMU are the same except of page size */641memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));642prop->pmmu_huge.page_size = PAGE_SIZE_2MB;643644/* shifts and masks are the same in PMMU and DMMU */645memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));646prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);647prop->dmmu.end_addr = VA_HOST_SPACE_END;648prop->dmmu.page_size = PAGE_SIZE_2MB;649prop->dmmu.pgt_size = prop->mmu_pgt_size;650651prop->cfg_size = CFG_SIZE;652prop->max_asid = MAX_ASID;653prop->num_of_events = GAUDI_EVENT_SIZE;654prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;655prop->tpc_enabled_mask = TPC_ENABLED_MASK;656657set_default_power_values(hdev);658659prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;660prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;661662prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;663prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;664665strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,666CARD_NAME_MAX_LEN);667668prop->max_pending_cs = GAUDI_MAX_PENDING_CS;669670prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =671prop->sync_stream_first_sob +672(num_sync_stream_queues * HL_RSVD_SOBS);673prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =674prop->sync_stream_first_mon +675(num_sync_stream_queues * HL_RSVD_MONS);676677prop->first_available_user_interrupt = USHRT_MAX;678prop->tpc_interrupt_id = USHRT_MAX;679680/* single msi */681prop->eq_interrupt_id = 0;682683for (i = 0 ; i < HL_MAX_DCORES ; i++)684prop->first_available_cq[i] = USHRT_MAX;685686prop->fw_cpu_boot_dev_sts0_valid = false;687prop->fw_cpu_boot_dev_sts1_valid = false;688prop->hard_reset_done_by_fw = false;689prop->gic_interrupts_enable = true;690691prop->server_type = HL_SERVER_TYPE_UNKNOWN;692693prop->clk_pll_index = HL_GAUDI_MME_PLL;694prop->max_freq_value = GAUDI_MAX_CLK_FREQ;695696prop->use_get_power_for_reset_history = true;697698prop->configurable_stop_on_err = true;699700prop->set_max_power_on_device_init = true;701702prop->dma_mask = 48;703704prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;705706return 0;707}708709static int gaudi_pci_bars_map(struct hl_device *hdev)710{711static const char * const name[] = {"SRAM", "CFG", "HBM"};712bool is_wc[3] = {false, false, true};713int rc;714715rc = hl_pci_bars_map(hdev, name, is_wc);716if (rc)717return rc;718719hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +720(CFG_BASE - SPI_FLASH_BASE_ADDR);721722return 0;723}724725static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)726{727struct gaudi_device *gaudi = hdev->asic_specific;728struct hl_inbound_pci_region pci_region;729u64 old_addr = addr;730int rc;731732if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))733return old_addr;734735if (hdev->asic_prop.iatu_done_by_fw)736return U64_MAX;737738/* Inbound Region 2 - Bar 4 - Point to HBM */739pci_region.mode = PCI_BAR_MATCH_MODE;740pci_region.bar = HBM_BAR_ID;741pci_region.addr = addr;742rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);743if (rc)744return U64_MAX;745746if (gaudi) {747old_addr = gaudi->hbm_bar_cur_addr;748gaudi->hbm_bar_cur_addr = addr;749}750751return old_addr;752}753754static int gaudi_init_iatu(struct hl_device *hdev)755{756struct hl_inbound_pci_region inbound_region;757struct hl_outbound_pci_region outbound_region;758int rc;759760if (hdev->asic_prop.iatu_done_by_fw)761return 0;762763/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */764inbound_region.mode = PCI_BAR_MATCH_MODE;765inbound_region.bar = SRAM_BAR_ID;766inbound_region.addr = SRAM_BASE_ADDR;767rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);768if (rc)769goto done;770771/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */772inbound_region.mode = PCI_BAR_MATCH_MODE;773inbound_region.bar = CFG_BAR_ID;774inbound_region.addr = SPI_FLASH_BASE_ADDR;775rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);776if (rc)777goto done;778779/* Inbound Region 2 - Bar 4 - Point to HBM */780inbound_region.mode = PCI_BAR_MATCH_MODE;781inbound_region.bar = HBM_BAR_ID;782inbound_region.addr = DRAM_PHYS_BASE;783rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);784if (rc)785goto done;786787/* Outbound Region 0 - Point to Host */788outbound_region.addr = HOST_PHYS_BASE;789outbound_region.size = HOST_PHYS_SIZE;790rc = hl_pci_set_outbound_region(hdev, &outbound_region);791792done:793return rc;794}795796static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)797{798return RREG32(mmHW_STATE);799}800801static int gaudi_early_init(struct hl_device *hdev)802{803struct asic_fixed_properties *prop = &hdev->asic_prop;804struct pci_dev *pdev = hdev->pdev;805resource_size_t pci_bar_size;806u32 fw_boot_status;807int rc;808809rc = gaudi_set_fixed_properties(hdev);810if (rc) {811dev_err(hdev->dev, "Failed setting fixed properties\n");812return rc;813}814815/* Check BAR sizes */816pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);817818if (pci_bar_size != SRAM_BAR_SIZE) {819dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",820SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);821rc = -ENODEV;822goto free_queue_props;823}824825pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);826827if (pci_bar_size != CFG_BAR_SIZE) {828dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",829CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);830rc = -ENODEV;831goto free_queue_props;832}833834prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);835hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);836837/* If FW security is enabled at this point it means no access to ELBI */838if (hdev->asic_prop.fw_security_enabled) {839hdev->asic_prop.iatu_done_by_fw = true;840841/*842* GIC-security-bit can ONLY be set by CPUCP, so in this stage843* decision can only be taken based on PCI ID security.844*/845hdev->asic_prop.gic_interrupts_enable = false;846goto pci_init;847}848849rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,850&fw_boot_status);851if (rc)852goto free_queue_props;853854/* Check whether FW is configuring iATU */855if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&856(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))857hdev->asic_prop.iatu_done_by_fw = true;858859pci_init:860rc = hl_pci_init(hdev);861if (rc)862goto free_queue_props;863864/* Before continuing in the initialization, we need to read the preboot865* version to determine whether we run with a security-enabled firmware866*/867rc = hl_fw_read_preboot_status(hdev);868if (rc) {869if (hdev->reset_on_preboot_fail)870/* we are already on failure flow, so don't check if hw_fini fails. */871hdev->asic_funcs->hw_fini(hdev, true, false);872goto pci_fini;873}874875if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {876dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");877rc = hdev->asic_funcs->hw_fini(hdev, true, false);878if (rc) {879dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);880goto pci_fini;881}882}883884return 0;885886pci_fini:887hl_pci_fini(hdev);888free_queue_props:889kfree(hdev->asic_prop.hw_queues_props);890return rc;891}892893static int gaudi_early_fini(struct hl_device *hdev)894{895kfree(hdev->asic_prop.hw_queues_props);896hl_pci_fini(hdev);897898return 0;899}900901/**902* gaudi_fetch_psoc_frequency - Fetch PSOC frequency values903*904* @hdev: pointer to hl_device structure905*906*/907static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)908{909u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;910struct asic_fixed_properties *prop = &hdev->asic_prop;911u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;912int rc;913914if ((hdev->fw_components & FW_TYPE_LINUX) &&915(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {916struct gaudi_device *gaudi = hdev->asic_specific;917918if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))919return 0;920921rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);922923if (rc)924return rc;925926freq = pll_freq_arr[2];927} else {928/* Backward compatibility */929div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);930div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);931nr = RREG32(mmPSOC_CPU_PLL_NR);932nf = RREG32(mmPSOC_CPU_PLL_NF);933od = RREG32(mmPSOC_CPU_PLL_OD);934935if (div_sel == DIV_SEL_REF_CLK ||936div_sel == DIV_SEL_DIVIDED_REF) {937if (div_sel == DIV_SEL_REF_CLK)938freq = PLL_REF_CLK;939else940freq = PLL_REF_CLK / (div_fctr + 1);941} else if (div_sel == DIV_SEL_PLL_CLK ||942div_sel == DIV_SEL_DIVIDED_PLL) {943pll_clk = PLL_REF_CLK * (nf + 1) /944((nr + 1) * (od + 1));945if (div_sel == DIV_SEL_PLL_CLK)946freq = pll_clk;947else948freq = pll_clk / (div_fctr + 1);949} else {950dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);951freq = 0;952}953}954955prop->psoc_timestamp_frequency = freq;956prop->psoc_pci_pll_nr = nr;957prop->psoc_pci_pll_nf = nf;958prop->psoc_pci_pll_od = od;959prop->psoc_pci_pll_div_factor = div_fctr;960961return 0;962}963964static int _gaudi_init_tpc_mem(struct hl_device *hdev,965dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)966{967struct asic_fixed_properties *prop = &hdev->asic_prop;968struct packet_lin_dma *init_tpc_mem_pkt;969struct hl_cs_job *job;970struct hl_cb *cb;971u64 dst_addr;972u32 cb_size, ctl;973u8 tpc_id;974int rc;975976cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);977if (!cb)978return -EFAULT;979980init_tpc_mem_pkt = cb->kernel_address;981cb_size = sizeof(*init_tpc_mem_pkt);982memset(init_tpc_mem_pkt, 0, cb_size);983984init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);985986ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);987ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);988ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);989ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);990991init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);992993init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);994995/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */996dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,997round_up(prop->sram_user_base_address, SZ_8K));998init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);9991000job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);1001if (!job) {1002dev_err(hdev->dev, "Failed to allocate a new job\n");1003rc = -ENOMEM;1004goto release_cb;1005}10061007job->id = 0;1008job->user_cb = cb;1009atomic_inc(&job->user_cb->cs_cnt);1010job->user_cb_size = cb_size;1011job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;1012job->patched_cb = job->user_cb;1013job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);10141015hl_debugfs_add_job(hdev, job);10161017rc = gaudi_send_job_on_qman0(hdev, job);10181019if (rc)1020goto free_job;10211022for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {1023rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);1024if (rc)1025break;1026}10271028free_job:1029hl_userptr_delete_list(hdev, &job->userptr_list);1030hl_debugfs_remove_job(hdev, job);1031kfree(job);1032atomic_dec(&cb->cs_cnt);10331034release_cb:1035hl_cb_put(cb);1036hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);10371038return rc;1039}10401041/*1042* gaudi_init_tpc_mem() - Initialize TPC memories.1043* @hdev: Pointer to hl_device structure.1044*1045* Copy TPC kernel fw from firmware file and run it to initialize TPC memories.1046*1047* Return: 0 for success, negative value for error.1048*/1049static int gaudi_init_tpc_mem(struct hl_device *hdev)1050{1051const struct firmware *fw;1052size_t fw_size;1053void *cpu_addr;1054dma_addr_t dma_handle;1055int rc, count = 5;10561057again:1058rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);1059if (rc == -EINTR && count-- > 0) {1060msleep(50);1061goto again;1062}10631064if (rc) {1065dev_err(hdev->dev, "Failed to load firmware file %s\n",1066GAUDI_TPC_FW_FILE);1067goto out;1068}10691070fw_size = fw->size;1071cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);1072if (!cpu_addr) {1073dev_err(hdev->dev,1074"Failed to allocate %zu of dma memory for TPC kernel\n",1075fw_size);1076rc = -ENOMEM;1077goto out;1078}10791080memcpy(cpu_addr, fw->data, fw_size);10811082rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);10831084hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);10851086out:1087release_firmware(fw);1088return rc;1089}10901091static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)1092{1093struct gaudi_device *gaudi = hdev->asic_specific;1094struct gaudi_collective_properties *prop = &gaudi->collective_props;1095struct hl_hw_queue *q;1096u32 i, sob_id, sob_group_id, queue_id;10971098/* Iterate through SOB groups and assign a SOB for each slave queue */1099sob_group_id =1100stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];1101sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;11021103queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;1104for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {1105q = &hdev->kernel_queues[queue_id + (4 * i)];1106q->sync_stream_prop.collective_sob_id = sob_id + i;1107}11081109/* Both DMA5 and TPC7 use the same resources since only a single1110* engine need to participate in the reduction process1111*/1112queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;1113q = &hdev->kernel_queues[queue_id];1114q->sync_stream_prop.collective_sob_id =1115sob_id + NIC_NUMBER_OF_ENGINES;11161117queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;1118q = &hdev->kernel_queues[queue_id];1119q->sync_stream_prop.collective_sob_id =1120sob_id + NIC_NUMBER_OF_ENGINES;1121}11221123static void gaudi_sob_group_hw_reset(struct kref *ref)1124{1125struct gaudi_hw_sob_group *hw_sob_group =1126container_of(ref, struct gaudi_hw_sob_group, kref);1127struct hl_device *hdev = hw_sob_group->hdev;1128int i;11291130for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)1131WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +1132(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);11331134kref_init(&hw_sob_group->kref);1135}11361137static void gaudi_sob_group_reset_error(struct kref *ref)1138{1139struct gaudi_hw_sob_group *hw_sob_group =1140container_of(ref, struct gaudi_hw_sob_group, kref);1141struct hl_device *hdev = hw_sob_group->hdev;11421143dev_crit(hdev->dev,1144"SOB release shouldn't be called here, base_sob_id: %d\n",1145hw_sob_group->base_sob_id);1146}11471148static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)1149{1150struct gaudi_collective_properties *prop;1151int i;11521153prop = &gaudi->collective_props;11541155memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));11561157for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)1158if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))1159prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=1160BIT(i % HL_MAX_SOBS_PER_MONITOR);1161/* Set collective engine bit */1162prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=1163BIT(i % HL_MAX_SOBS_PER_MONITOR);1164}11651166static int gaudi_collective_init(struct hl_device *hdev)1167{1168u32 i, sob_id, reserved_sobs_per_group;1169struct gaudi_collective_properties *prop;1170struct gaudi_device *gaudi;11711172gaudi = hdev->asic_specific;1173prop = &gaudi->collective_props;1174sob_id = hdev->asic_prop.collective_first_sob;11751176/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */1177reserved_sobs_per_group =1178ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);11791180/* Init SOB groups */1181for (i = 0 ; i < NUM_SOB_GROUPS; i++) {1182prop->hw_sob_group[i].hdev = hdev;1183prop->hw_sob_group[i].base_sob_id = sob_id;1184sob_id += reserved_sobs_per_group;1185gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);1186}11871188for (i = 0 ; i < QMAN_STREAMS; i++) {1189prop->next_sob_group_val[i] = 1;1190prop->curr_sob_group_idx[i] = 0;1191gaudi_collective_map_sobs(hdev, i);1192}11931194gaudi_collective_mstr_sob_mask_set(gaudi);11951196return 0;1197}11981199static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)1200{1201struct gaudi_device *gaudi = hdev->asic_specific;1202struct gaudi_collective_properties *cprop = &gaudi->collective_props;12031204kref_put(&cprop->hw_sob_group[sob_group].kref,1205gaudi_sob_group_hw_reset);1206}12071208static void gaudi_collective_master_init_job(struct hl_device *hdev,1209struct hl_cs_job *job, u32 stream, u32 sob_group_offset)1210{1211u32 master_sob_base, master_monitor, queue_id, cb_size = 0;1212struct gaudi_collective_properties *cprop;1213struct hl_gen_wait_properties wait_prop;1214struct hl_sync_stream_properties *prop;1215struct gaudi_device *gaudi;12161217gaudi = hdev->asic_specific;1218cprop = &gaudi->collective_props;1219queue_id = job->hw_queue_id;1220prop = &hdev->kernel_queues[queue_id].sync_stream_prop;12211222master_sob_base =1223cprop->hw_sob_group[sob_group_offset].base_sob_id;1224master_monitor = prop->collective_mstr_mon_id[0];12251226cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;12271228dev_dbg(hdev->dev,1229"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",1230master_sob_base, cprop->mstr_sob_mask[0],1231cprop->next_sob_group_val[stream],1232master_monitor, queue_id);12331234wait_prop.data = (void *) job->patched_cb;1235wait_prop.sob_base = master_sob_base;1236wait_prop.sob_mask = cprop->mstr_sob_mask[0];1237wait_prop.sob_val = cprop->next_sob_group_val[stream];1238wait_prop.mon_id = master_monitor;1239wait_prop.q_idx = queue_id;1240wait_prop.size = cb_size;1241cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);12421243master_sob_base += HL_MAX_SOBS_PER_MONITOR;1244master_monitor = prop->collective_mstr_mon_id[1];12451246dev_dbg(hdev->dev,1247"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",1248master_sob_base, cprop->mstr_sob_mask[1],1249cprop->next_sob_group_val[stream],1250master_monitor, queue_id);12511252wait_prop.sob_base = master_sob_base;1253wait_prop.sob_mask = cprop->mstr_sob_mask[1];1254wait_prop.mon_id = master_monitor;1255wait_prop.size = cb_size;1256cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);1257}12581259static void gaudi_collective_slave_init_job(struct hl_device *hdev,1260struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)1261{1262struct hl_gen_wait_properties wait_prop;1263struct hl_sync_stream_properties *prop;1264u32 queue_id, cb_size = 0;12651266queue_id = job->hw_queue_id;1267prop = &hdev->kernel_queues[queue_id].sync_stream_prop;12681269if (job->cs->encaps_signals) {1270/* use the encaps signal handle store earlier in the flow1271* and set the SOB information from the encaps1272* signals handle1273*/1274hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,1275cs_cmpl);12761277dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",1278job->cs->sequence,1279cs_cmpl->hw_sob->sob_id,1280cs_cmpl->sob_val);1281}12821283/* Add to wait CBs using slave monitor */1284wait_prop.data = (void *) job->user_cb;1285wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;1286wait_prop.sob_mask = 0x1;1287wait_prop.sob_val = cs_cmpl->sob_val;1288wait_prop.mon_id = prop->collective_slave_mon_id;1289wait_prop.q_idx = queue_id;1290wait_prop.size = cb_size;12911292dev_dbg(hdev->dev,1293"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",1294cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,1295prop->collective_slave_mon_id, queue_id);12961297cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);12981299dev_dbg(hdev->dev,1300"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",1301prop->collective_sob_id, queue_id);13021303cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,1304prop->collective_sob_id, cb_size, false);1305}13061307static int gaudi_collective_wait_init_cs(struct hl_cs *cs)1308{1309struct hl_cs_compl *signal_cs_cmpl =1310container_of(cs->signal_fence, struct hl_cs_compl, base_fence);1311struct hl_cs_compl *cs_cmpl =1312container_of(cs->fence, struct hl_cs_compl, base_fence);1313struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;1314struct gaudi_collective_properties *cprop;1315u32 stream, queue_id, sob_group_offset;1316struct gaudi_device *gaudi;1317struct hl_device *hdev;1318struct hl_cs_job *job;1319struct hl_ctx *ctx;13201321ctx = cs->ctx;1322hdev = ctx->hdev;1323gaudi = hdev->asic_specific;1324cprop = &gaudi->collective_props;13251326if (cs->encaps_signals) {1327cs_cmpl->hw_sob = handle->hw_sob;1328/* at this checkpoint we only need the hw_sob pointer1329* for the completion check before start going over the jobs1330* of the master/slaves, the sob_value will be taken later on1331* in gaudi_collective_slave_init_job depends on each1332* job wait offset value.1333*/1334cs_cmpl->sob_val = 0;1335} else {1336/* copy the SOB id and value of the signal CS */1337cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;1338cs_cmpl->sob_val = signal_cs_cmpl->sob_val;1339}13401341/* check again if the signal cs already completed.1342* if yes then don't send any wait cs since the hw_sob1343* could be in reset already. if signal is not completed1344* then get refcount to hw_sob to prevent resetting the sob1345* while wait cs is not submitted.1346* note that this check is protected by two locks,1347* hw queue lock and completion object lock,1348* and the same completion object lock also protects1349* the hw_sob reset handler function.1350* The hw_queue lock prevent out of sync of hw_sob1351* refcount value, changed by signal/wait flows.1352*/1353spin_lock(&signal_cs_cmpl->lock);13541355if (completion_done(&cs->signal_fence->completion)) {1356spin_unlock(&signal_cs_cmpl->lock);1357return -EINVAL;1358}1359/* Increment kref since all slave queues are now waiting on it */1360kref_get(&cs_cmpl->hw_sob->kref);13611362spin_unlock(&signal_cs_cmpl->lock);13631364/* Calculate the stream from collective master queue (1st job) */1365job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);1366stream = job->hw_queue_id % 4;1367sob_group_offset =1368stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];13691370list_for_each_entry(job, &cs->job_list, cs_node) {1371queue_id = job->hw_queue_id;13721373if (hdev->kernel_queues[queue_id].collective_mode ==1374HL_COLLECTIVE_MASTER)1375gaudi_collective_master_init_job(hdev, job, stream,1376sob_group_offset);1377else1378gaudi_collective_slave_init_job(hdev, job, cs_cmpl);1379}13801381cs_cmpl->sob_group = sob_group_offset;13821383/* Handle sob group kref and wraparound */1384kref_get(&cprop->hw_sob_group[sob_group_offset].kref);1385cprop->next_sob_group_val[stream]++;13861387if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {1388/*1389* Decrement as we reached the max value.1390* The release function won't be called here as we've1391* just incremented the refcount.1392*/1393kref_put(&cprop->hw_sob_group[sob_group_offset].kref,1394gaudi_sob_group_reset_error);1395cprop->next_sob_group_val[stream] = 1;1396/* only two SOBs are currently in use */1397cprop->curr_sob_group_idx[stream] =1398(cprop->curr_sob_group_idx[stream] + 1) &1399(HL_RSVD_SOBS - 1);14001401gaudi_collective_map_sobs(hdev, stream);14021403dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",1404cprop->curr_sob_group_idx[stream], stream);1405}14061407mb();1408hl_fence_put(cs->signal_fence);1409cs->signal_fence = NULL;14101411return 0;1412}14131414static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)1415{1416u32 cacheline_end, additional_commands;14171418cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);1419additional_commands = sizeof(struct packet_msg_prot) * 2;14201421if (user_cb_size + additional_commands > cacheline_end)1422return cacheline_end - user_cb_size + additional_commands;1423else1424return additional_commands;1425}14261427static int gaudi_collective_wait_create_job(struct hl_device *hdev,1428struct hl_ctx *ctx, struct hl_cs *cs,1429enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,1430u32 encaps_signal_offset)1431{1432struct hw_queue_properties *hw_queue_prop;1433struct hl_cs_counters_atomic *cntr;1434struct hl_cs_job *job;1435struct hl_cb *cb;1436u32 cb_size;1437bool patched_cb;14381439cntr = &hdev->aggregated_cs_counters;14401441if (mode == HL_COLLECTIVE_MASTER) {1442/* CB size of collective master queue contains1443* 4 msg short packets for monitor 1 configuration1444* 1 fence packet1445* 4 msg short packets for monitor 2 configuration1446* 1 fence packet1447* 2 msg prot packets for completion and MSI1448*/1449cb_size = sizeof(struct packet_msg_short) * 8 +1450sizeof(struct packet_fence) * 2 +1451sizeof(struct packet_msg_prot) * 2;1452patched_cb = true;1453} else {1454/* CB size of collective slave queues contains1455* 4 msg short packets for monitor configuration1456* 1 fence packet1457* 1 additional msg short packet for sob signal1458*/1459cb_size = sizeof(struct packet_msg_short) * 5 +1460sizeof(struct packet_fence);1461patched_cb = false;1462}14631464hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];1465job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);1466if (!job) {1467atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);1468atomic64_inc(&cntr->out_of_mem_drop_cnt);1469dev_err(hdev->dev, "Failed to allocate a new job\n");1470return -ENOMEM;1471}14721473/* Allocate internal mapped CB for non patched CBs */1474cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);1475if (!cb) {1476atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);1477atomic64_inc(&cntr->out_of_mem_drop_cnt);1478kfree(job);1479return -EFAULT;1480}14811482job->id = 0;1483job->cs = cs;1484job->user_cb = cb;1485atomic_inc(&job->user_cb->cs_cnt);1486job->user_cb_size = cb_size;1487job->hw_queue_id = queue_id;14881489/* since its guaranteed to have only one chunk in the collective wait1490* cs, we can use this chunk to set the encapsulated signal offset1491* in the jobs.1492*/1493if (cs->encaps_signals)1494job->encaps_sig_wait_offset = encaps_signal_offset;14951496/*1497* No need in parsing, user CB is the patched CB.1498* We call hl_cb_destroy() out of two reasons - we don't need1499* the CB in the CB idr anymore and to decrement its refcount as1500* it was incremented inside hl_cb_kernel_create().1501*/1502if (patched_cb)1503job->patched_cb = job->user_cb;1504else1505job->patched_cb = NULL;15061507job->job_cb_size = job->user_cb_size;1508hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);15091510/* increment refcount as for external queues we get completion */1511if (hw_queue_prop->type == QUEUE_TYPE_EXT)1512cs_get(cs);15131514cs->jobs_in_queue_cnt[job->hw_queue_id]++;15151516list_add_tail(&job->cs_node, &cs->job_list);15171518hl_debugfs_add_job(hdev, job);15191520return 0;1521}15221523static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,1524struct hl_ctx *ctx, struct hl_cs *cs,1525u32 wait_queue_id, u32 collective_engine_id,1526u32 encaps_signal_offset)1527{1528struct gaudi_device *gaudi = hdev->asic_specific;1529struct hw_queue_properties *hw_queue_prop;1530u32 queue_id, collective_queue, num_jobs;1531u32 stream, nic_queue, nic_idx = 0;1532bool skip;1533int i, rc = 0;15341535/* Verify wait queue id is configured as master */1536hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];1537if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {1538dev_err(hdev->dev,1539"Queue %d is not configured as collective master\n",1540wait_queue_id);1541return -EINVAL;1542}15431544/* Verify engine id is supported */1545if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&1546collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {1547dev_err(hdev->dev,1548"Collective wait does not support engine %u\n",1549collective_engine_id);1550return -EINVAL;1551}15521553stream = wait_queue_id % 4;15541555if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)1556collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;1557else1558collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;15591560num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;1561nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;15621563/* First job goes to the collective master queue, it will wait for1564* the collective slave queues to finish execution.1565* The synchronization is done using two monitors:1566* First monitor for NICs 0-7, second monitor for NICs 8-9 and the1567* reduction engine (DMA5/TPC7).1568*1569* Rest of the jobs goes to the collective slave queues which will1570* all wait for the user to signal sob 'cs_cmpl->sob_val'.1571*/1572for (i = 0 ; i < num_jobs ; i++) {1573if (i == 0) {1574queue_id = wait_queue_id;1575rc = gaudi_collective_wait_create_job(hdev, ctx, cs,1576HL_COLLECTIVE_MASTER, queue_id,1577wait_queue_id, encaps_signal_offset);1578} else {1579if (nic_idx < NIC_NUMBER_OF_ENGINES) {1580if (gaudi->hw_cap_initialized &1581BIT(HW_CAP_NIC_SHIFT + nic_idx))1582skip = false;1583else1584skip = true;15851586queue_id = nic_queue;1587nic_queue += 4;1588nic_idx++;15891590if (skip)1591continue;1592} else {1593queue_id = collective_queue;1594}15951596rc = gaudi_collective_wait_create_job(hdev, ctx, cs,1597HL_COLLECTIVE_SLAVE, queue_id,1598wait_queue_id, encaps_signal_offset);1599}16001601if (rc)1602return rc;1603}16041605return rc;1606}16071608static int gaudi_late_init(struct hl_device *hdev)1609{1610struct gaudi_device *gaudi = hdev->asic_specific;1611int rc;16121613rc = gaudi->cpucp_info_get(hdev);1614if (rc) {1615dev_err(hdev->dev, "Failed to get cpucp info\n");1616return rc;1617}16181619if ((hdev->card_type == cpucp_card_type_pci) &&1620(hdev->nic_ports_mask & 0x3)) {1621dev_info(hdev->dev,1622"PCI card detected, only 8 ports are enabled\n");1623hdev->nic_ports_mask &= ~0x3;16241625/* Stop and disable unused NIC QMANs */1626WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |1627NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |1628NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);16291630WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |1631NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |1632NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);16331634WREG32(mmNIC0_QM0_GLBL_CFG0, 0);1635WREG32(mmNIC0_QM1_GLBL_CFG0, 0);16361637gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);1638}16391640rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);1641if (rc)1642return rc;16431644/* Scrub both SRAM and DRAM */1645rc = hdev->asic_funcs->scrub_device_mem(hdev);1646if (rc)1647goto disable_pci_access;16481649rc = gaudi_fetch_psoc_frequency(hdev);1650if (rc) {1651dev_err(hdev->dev, "Failed to fetch psoc frequency\n");1652goto disable_pci_access;1653}16541655rc = gaudi_mmu_clear_pgt_range(hdev);1656if (rc) {1657dev_err(hdev->dev, "Failed to clear MMU page tables range\n");1658goto disable_pci_access;1659}16601661rc = gaudi_init_tpc_mem(hdev);1662if (rc) {1663dev_err(hdev->dev, "Failed to initialize TPC memories\n");1664goto disable_pci_access;1665}16661667rc = gaudi_collective_init(hdev);1668if (rc) {1669dev_err(hdev->dev, "Failed to init collective\n");1670goto disable_pci_access;1671}16721673/* We only support a single ASID for the user, so for the sake of optimization, just1674* initialize the ASID one time during device initialization with the fixed value of 11675*/1676gaudi_mmu_prepare(hdev, 1);16771678hl_fw_set_pll_profile(hdev);16791680return 0;16811682disable_pci_access:1683hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);16841685return rc;1686}16871688static void gaudi_late_fini(struct hl_device *hdev)1689{1690hl_hwmon_release_resources(hdev);1691}16921693static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)1694{1695dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;1696void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};1697int i, j, rc = 0;16981699/*1700* The device CPU works with 40-bits addresses, while bit 39 must be set1701* to '1' when accessing the host.1702* Bits 49:39 of the full host address are saved for a later1703* configuration of the HW to perform extension to 50 bits.1704* Because there is a single HW register that holds the extension bits,1705* these bits must be identical in all allocated range.1706*/17071708for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {1709virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,1710&dma_addr_arr[i],1711GFP_KERNEL | __GFP_ZERO);1712if (!virt_addr_arr[i]) {1713rc = -ENOMEM;1714goto free_dma_mem_arr;1715}17161717end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;1718if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==1719GAUDI_CPU_PCI_MSB_ADDR(end_addr))1720break;1721}17221723if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {1724dev_err(hdev->dev,1725"MSB of CPU accessible DMA memory are not identical in all range\n");1726rc = -EFAULT;1727goto free_dma_mem_arr;1728}17291730hdev->cpu_accessible_dma_mem = virt_addr_arr[i];1731hdev->cpu_accessible_dma_address = dma_addr_arr[i];1732hdev->cpu_pci_msb_addr =1733GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);17341735if (!hdev->asic_prop.fw_security_enabled)1736GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);17371738free_dma_mem_arr:1739for (j = 0 ; j < i ; j++)1740hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],1741dma_addr_arr[j]);17421743return rc;1744}17451746static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)1747{1748struct gaudi_device *gaudi = hdev->asic_specific;1749struct gaudi_internal_qman_info *q;1750u32 i;17511752for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {1753q = &gaudi->internal_qmans[i];1754if (!q->pq_kernel_addr)1755continue;1756hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);1757}1758}17591760static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)1761{1762struct gaudi_device *gaudi = hdev->asic_specific;1763struct gaudi_internal_qman_info *q;1764int rc, i;17651766for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {1767if (gaudi_queue_type[i] != QUEUE_TYPE_INT)1768continue;17691770q = &gaudi->internal_qmans[i];17711772switch (i) {1773case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:1774q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;1775break;1776case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:1777q->pq_size = MME_QMAN_SIZE_IN_BYTES;1778break;1779case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:1780q->pq_size = TPC_QMAN_SIZE_IN_BYTES;1781break;1782case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:1783q->pq_size = NIC_QMAN_SIZE_IN_BYTES;1784break;1785default:1786dev_err(hdev->dev, "Bad internal queue index %d", i);1787rc = -EINVAL;1788goto free_internal_qmans_pq_mem;1789}17901791q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,1792GFP_KERNEL | __GFP_ZERO);1793if (!q->pq_kernel_addr) {1794rc = -ENOMEM;1795goto free_internal_qmans_pq_mem;1796}1797}17981799return 0;18001801free_internal_qmans_pq_mem:1802gaudi_free_internal_qmans_pq_mem(hdev);1803return rc;1804}18051806static void gaudi_set_pci_memory_regions(struct hl_device *hdev)1807{1808struct asic_fixed_properties *prop = &hdev->asic_prop;1809struct pci_mem_region *region;18101811/* CFG */1812region = &hdev->pci_mem_region[PCI_REGION_CFG];1813region->region_base = CFG_BASE;1814region->region_size = CFG_SIZE;1815region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;1816region->bar_size = CFG_BAR_SIZE;1817region->bar_id = CFG_BAR_ID;1818region->used = 1;18191820/* SRAM */1821region = &hdev->pci_mem_region[PCI_REGION_SRAM];1822region->region_base = SRAM_BASE_ADDR;1823region->region_size = SRAM_SIZE;1824region->offset_in_bar = 0;1825region->bar_size = SRAM_BAR_SIZE;1826region->bar_id = SRAM_BAR_ID;1827region->used = 1;18281829/* DRAM */1830region = &hdev->pci_mem_region[PCI_REGION_DRAM];1831region->region_base = DRAM_PHYS_BASE;1832region->region_size = hdev->asic_prop.dram_size;1833region->offset_in_bar = 0;1834region->bar_size = prop->dram_pci_bar_size;1835region->bar_id = HBM_BAR_ID;1836region->used = 1;18371838/* SP SRAM */1839region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];1840region->region_base = PSOC_SCRATCHPAD_ADDR;1841region->region_size = PSOC_SCRATCHPAD_SIZE;1842region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;1843region->bar_size = CFG_BAR_SIZE;1844region->bar_id = CFG_BAR_ID;1845region->used = 1;1846}18471848static int gaudi_sw_init(struct hl_device *hdev)1849{1850struct gaudi_device *gaudi;1851u32 i, event_id = 0;1852int rc;18531854/* Allocate device structure */1855gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);1856if (!gaudi)1857return -ENOMEM;18581859for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {1860if (gaudi_irq_map_table[i].valid) {1861if (event_id == GAUDI_EVENT_SIZE) {1862dev_err(hdev->dev,1863"Event array exceeds the limit of %u events\n",1864GAUDI_EVENT_SIZE);1865rc = -EINVAL;1866goto free_gaudi_device;1867}18681869gaudi->events[event_id++] =1870gaudi_irq_map_table[i].fc_id;1871}1872}18731874gaudi->cpucp_info_get = gaudi_cpucp_info_get;18751876hdev->asic_specific = gaudi;18771878/* Create DMA pool for small allocations */1879hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),1880&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);1881if (!hdev->dma_pool) {1882dev_err(hdev->dev, "failed to create DMA pool\n");1883rc = -ENOMEM;1884goto free_gaudi_device;1885}18861887rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);1888if (rc)1889goto free_dma_pool;18901891hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);1892if (!hdev->cpu_accessible_dma_pool) {1893dev_err(hdev->dev,1894"Failed to create CPU accessible DMA pool\n");1895rc = -ENOMEM;1896goto free_cpu_dma_mem;1897}18981899rc = gen_pool_add(hdev->cpu_accessible_dma_pool,1900(uintptr_t) hdev->cpu_accessible_dma_mem,1901HL_CPU_ACCESSIBLE_MEM_SIZE, -1);1902if (rc) {1903dev_err(hdev->dev,1904"Failed to add memory to CPU accessible DMA pool\n");1905rc = -EFAULT;1906goto free_cpu_accessible_dma_pool;1907}19081909rc = gaudi_alloc_internal_qmans_pq_mem(hdev);1910if (rc)1911goto free_cpu_accessible_dma_pool;19121913spin_lock_init(&gaudi->hw_queues_lock);19141915hdev->supports_sync_stream = true;1916hdev->supports_coresight = true;1917hdev->supports_staged_submission = true;1918hdev->supports_wait_for_multi_cs = true;19191920hdev->asic_funcs->set_pci_memory_regions(hdev);1921hdev->stream_master_qid_arr =1922hdev->asic_funcs->get_stream_master_qid_arr();1923hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;19241925return 0;19261927free_cpu_accessible_dma_pool:1928gen_pool_destroy(hdev->cpu_accessible_dma_pool);1929free_cpu_dma_mem:1930if (!hdev->asic_prop.fw_security_enabled)1931GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,1932hdev->cpu_pci_msb_addr);1933hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,1934hdev->cpu_accessible_dma_address);1935free_dma_pool:1936dma_pool_destroy(hdev->dma_pool);1937free_gaudi_device:1938kfree(gaudi);1939return rc;1940}19411942static int gaudi_sw_fini(struct hl_device *hdev)1943{1944struct gaudi_device *gaudi = hdev->asic_specific;19451946gaudi_free_internal_qmans_pq_mem(hdev);19471948gen_pool_destroy(hdev->cpu_accessible_dma_pool);19491950if (!hdev->asic_prop.fw_security_enabled)1951GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,1952hdev->cpu_pci_msb_addr);19531954hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,1955hdev->cpu_accessible_dma_address);19561957dma_pool_destroy(hdev->dma_pool);19581959kfree(gaudi);19601961return 0;1962}19631964static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)1965{1966struct hl_device *hdev = arg;1967int i;19681969if (hdev->disabled)1970return IRQ_HANDLED;19711972for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)1973hl_irq_handler_cq(irq, &hdev->completion_queue[i]);19741975hl_irq_handler_eq(irq, &hdev->event_queue);19761977return IRQ_HANDLED;1978}19791980/*1981* For backward compatibility, new MSI interrupts should be set after the1982* existing CPU and NIC interrupts.1983*/1984static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,1985bool cpu_eq)1986{1987int msi_vec;19881989if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))1990dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",1991GAUDI_EVENT_QUEUE_MSI_IDX);19921993msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :1994(nr + NIC_NUMBER_OF_ENGINES + 1);19951996return pci_irq_vector(hdev->pdev, msi_vec);1997}19981999static int gaudi_enable_msi_single(struct hl_device *hdev)2000{2001int rc, irq;20022003dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");20042005irq = gaudi_pci_irq_vector(hdev, 0, false);2006rc = request_irq(irq, gaudi_irq_handler_single, 0,2007"gaudi single msi", hdev);2008if (rc)2009dev_err(hdev->dev,2010"Failed to request single MSI IRQ\n");20112012return rc;2013}20142015static int gaudi_enable_msi(struct hl_device *hdev)2016{2017struct gaudi_device *gaudi = hdev->asic_specific;2018int rc;20192020if (gaudi->hw_cap_initialized & HW_CAP_MSI)2021return 0;20222023rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);2024if (rc < 0) {2025dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);2026return rc;2027}20282029rc = gaudi_enable_msi_single(hdev);2030if (rc)2031goto free_pci_irq_vectors;20322033gaudi->hw_cap_initialized |= HW_CAP_MSI;20342035return 0;20362037free_pci_irq_vectors:2038pci_free_irq_vectors(hdev->pdev);2039return rc;2040}20412042static void gaudi_sync_irqs(struct hl_device *hdev)2043{2044struct gaudi_device *gaudi = hdev->asic_specific;20452046if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))2047return;20482049/* Wait for all pending IRQs to be finished */2050synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));2051}20522053static void gaudi_disable_msi(struct hl_device *hdev)2054{2055struct gaudi_device *gaudi = hdev->asic_specific;20562057if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))2058return;20592060gaudi_sync_irqs(hdev);2061free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);2062pci_free_irq_vectors(hdev->pdev);20632064gaudi->hw_cap_initialized &= ~HW_CAP_MSI;2065}20662067static void gaudi_init_scrambler_sram(struct hl_device *hdev)2068{2069struct gaudi_device *gaudi = hdev->asic_specific;20702071if (hdev->asic_prop.fw_security_enabled)2072return;20732074if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &2075CPU_BOOT_DEV_STS0_SRAM_SCR_EN)2076return;20772078if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)2079return;20802081WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,20821 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2083WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,20841 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2085WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,20861 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2087WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,20881 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2089WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,20901 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2091WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,20921 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2093WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,20941 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2095WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,20961 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);20972098WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,20991 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2100WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,21011 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2102WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,21031 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2104WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,21051 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2106WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,21071 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2108WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,21091 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2110WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,21111 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2112WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,21131 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);21142115WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,21161 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2117WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,21181 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2119WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,21201 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2121WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,21221 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2123WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,21241 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2125WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,21261 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2127WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,21281 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2129WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,21301 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);21312132gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;2133}21342135static void gaudi_init_scrambler_hbm(struct hl_device *hdev)2136{2137struct gaudi_device *gaudi = hdev->asic_specific;21382139if (hdev->asic_prop.fw_security_enabled)2140return;21412142if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &2143CPU_BOOT_DEV_STS0_DRAM_SCR_EN)2144return;21452146if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)2147return;21482149WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,21501 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2151WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,21521 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2153WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,21541 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2155WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,21561 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2157WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,21581 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2159WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,21601 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2161WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,21621 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2163WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,21641 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);21652166WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,21671 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2168WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,21691 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2170WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,21711 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2172WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,21731 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2174WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,21751 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2176WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,21771 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2178WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,21791 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2180WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,21811 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);21822183WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,21841 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2185WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,21861 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2187WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,21881 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2189WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,21901 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2191WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,21921 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2193WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,21941 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2195WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,21961 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2197WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,21981 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);21992200gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;2201}22022203static void gaudi_init_e2e(struct hl_device *hdev)2204{2205if (hdev->asic_prop.fw_security_enabled)2206return;22072208if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &2209CPU_BOOT_DEV_STS0_E2E_CRED_EN)2210return;22112212WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);2213WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);2214WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);2215WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);22162217WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);2218WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);2219WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);2220WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);22212222WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);2223WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);2224WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);2225WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);22262227WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);2228WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);2229WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);2230WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);22312232WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);2233WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);2234WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);2235WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);22362237WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);2238WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);2239WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);2240WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);22412242WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);2243WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);2244WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);2245WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);22462247WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);2248WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);2249WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);2250WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);22512252WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);2253WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);2254WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);2255WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);22562257WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);2258WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);2259WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);2260WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);22612262WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);2263WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);2264WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);2265WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);22662267WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);2268WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);2269WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);2270WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);22712272WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);2273WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);2274WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);2275WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);22762277WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);2278WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);2279WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);2280WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);22812282WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);2283WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);2284WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);2285WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);22862287WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);2288WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);2289WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);2290WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);22912292WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);2293WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);2294WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);2295WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);22962297WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);2298WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);2299WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);2300WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);23012302WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);2303WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);2304WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);2305WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);23062307WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);2308WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);2309WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);2310WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);23112312WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);2313WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);2314WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);2315WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);23162317WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);2318WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);2319WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);2320WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);23212322WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);2323WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);2324WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);2325WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);23262327WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);2328WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);2329WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);2330WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);23312332WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,23331 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2334WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,23351 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23362337WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,23381 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2339WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,23401 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23412342WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,23431 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2344WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,23451 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23462347WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,23481 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2349WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,23501 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23512352WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,23531 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2354WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,23551 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23562357WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,23581 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2359WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,23601 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23612362WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,23631 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2364WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,23651 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23662367WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,23681 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2369WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,23701 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23712372WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,23731 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2374WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,23751 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23762377WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,23781 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2379WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,23801 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23812382WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,23831 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2384WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,23851 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23862387WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,23881 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2389WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,23901 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23912392WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,23931 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2394WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,23951 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23962397WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,23981 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2399WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,24001 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);24012402WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,24031 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2404WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,24051 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);24062407WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,24081 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2409WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,24101 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);24112412WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,24131 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2414WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,24151 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24162417WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,24181 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2419WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,24201 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24212422WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,24231 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2424WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,24251 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24262427WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,24281 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2429WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,24301 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24312432WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,24331 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2434WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,24351 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24362437WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,24381 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2439WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,24401 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24412442WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,24431 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2444WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,24451 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24462447WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,24481 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2449WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,24501 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);2451}24522453static void gaudi_init_hbm_cred(struct hl_device *hdev)2454{2455u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;24562457if (hdev->asic_prop.fw_security_enabled)2458return;24592460if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &2461CPU_BOOT_DEV_STS0_HBM_CRED_EN)2462return;24632464hbm0_wr = 0x33333333;2465hbm0_rd = 0x77777777;2466hbm1_wr = 0x55555555;2467hbm1_rd = 0xDDDDDDDD;24682469WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);2470WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);2471WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);2472WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);24732474WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);2475WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);2476WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);2477WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);24782479WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);2480WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);2481WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);2482WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);24832484WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);2485WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);2486WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);2487WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);24882489WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,2490(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2491(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2492WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,2493(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2494(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2495WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,2496(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2497(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2498WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,2499(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2500(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));25012502WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,2503(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2504(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2505WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,2506(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2507(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2508WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,2509(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2510(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2511WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,2512(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2513(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2514}25152516static void gaudi_init_golden_registers(struct hl_device *hdev)2517{2518u32 tpc_offset;2519int tpc_id, i;25202521gaudi_init_e2e(hdev);2522gaudi_init_hbm_cred(hdev);25232524for (tpc_id = 0, tpc_offset = 0;2525tpc_id < TPC_NUMBER_OF_ENGINES;2526tpc_id++, tpc_offset += TPC_CFG_OFFSET) {2527/* Mask all arithmetic interrupts from TPC */2528WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);2529/* Set 16 cache lines */2530WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,2531ICACHE_FETCH_LINE_NUM, 2);2532}25332534/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */2535for (i = 0 ; i < 128 ; i += 8)2536writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);25372538WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);2539WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);2540WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);2541WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);2542}25432544static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,2545int qman_id, dma_addr_t qman_pq_addr)2546{2547struct cpu_dyn_regs *dyn_regs =2548&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2549u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;2550u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;2551u32 q_off, dma_qm_offset;2552u32 dma_qm_err_cfg, irq_handler_offset;25532554dma_qm_offset = dma_id * DMA_QMAN_OFFSET;25552556mtr_base_en_lo = lower_32_bits(CFG_BASE +2557mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2558mtr_base_en_hi = upper_32_bits(CFG_BASE +2559mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2560so_base_en_lo = lower_32_bits(CFG_BASE +2561mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2562so_base_en_hi = upper_32_bits(CFG_BASE +2563mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2564mtr_base_ws_lo = lower_32_bits(CFG_BASE +2565mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2566mtr_base_ws_hi = upper_32_bits(CFG_BASE +2567mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2568so_base_ws_lo = lower_32_bits(CFG_BASE +2569mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);2570so_base_ws_hi = upper_32_bits(CFG_BASE +2571mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);25722573q_off = dma_qm_offset + qman_id * 4;25742575WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));2576WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));25772578WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));2579WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);2580WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);25812582WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);2583WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2584QMAN_LDMA_SRC_OFFSET);2585WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2586QMAN_LDMA_DST_OFFSET);25872588WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);2589WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);2590WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);2591WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);2592WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);2593WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);2594WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);2595WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);25962597WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);25982599/* The following configuration is needed only once per QMAN */2600if (qman_id == 0) {2601irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?2602mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :2603le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);26042605/* Configure RAZWI IRQ */2606dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;2607if (hdev->stop_on_err)2608dma_qm_err_cfg |=2609PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;26102611WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);26122613WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,2614lower_32_bits(CFG_BASE + irq_handler_offset));2615WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,2616upper_32_bits(CFG_BASE + irq_handler_offset));26172618WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,2619gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +2620dma_id);26212622WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,2623QM_ARB_ERR_MSG_EN_MASK);26242625/* Set timeout to maximum */2626WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);26272628WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,2629QMAN_EXTERNAL_MAKE_TRUSTED);26302631WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);2632}2633}26342635static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)2636{2637struct cpu_dyn_regs *dyn_regs =2638&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2639u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;2640u32 dma_offset = dma_id * DMA_CORE_OFFSET;2641u32 irq_handler_offset;26422643/* Set to maximum possible according to physical size */2644WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);2645WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);26462647/* WA for H/W bug H3-2116 */2648WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);26492650/* STOP_ON bit implies no completion to operation in case of RAZWI */2651if (hdev->stop_on_err)2652dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;26532654WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);26552656irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?2657mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :2658le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);26592660WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,2661lower_32_bits(CFG_BASE + irq_handler_offset));2662WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,2663upper_32_bits(CFG_BASE + irq_handler_offset));26642665WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,2666gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);2667WREG32(mmDMA0_CORE_PROT + dma_offset,26681 << DMA0_CORE_PROT_ERR_VAL_SHIFT);2669/* If the channel is secured, it should be in MMU bypass mode */2670WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,26711 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);2672WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);2673}26742675static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,2676u32 enable_mask)2677{2678u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;26792680WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);2681}26822683static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)2684{2685struct gaudi_device *gaudi = hdev->asic_specific;2686struct hl_hw_queue *q;2687int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;26882689if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)2690return;26912692for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {2693dma_id = gaudi_dma_assignment[i];2694/*2695* For queues after the CPU Q need to add 1 to get the correct2696* queue. In addition, need to add the CPU EQ and NIC IRQs in2697* order to get the correct MSI register.2698*/2699if (dma_id > 1) {2700cpu_skip = 1;2701nic_skip = NIC_NUMBER_OF_ENGINES;2702} else {2703cpu_skip = 0;2704nic_skip = 0;2705}27062707for (j = 0 ; j < QMAN_STREAMS ; j++) {2708q_idx = 4 * dma_id + j + cpu_skip;2709q = &hdev->kernel_queues[q_idx];2710q->cq_id = cq_id++;2711q->msi_vec = nic_skip + cpu_skip + msi_vec++;2712gaudi_init_pci_dma_qman(hdev, dma_id, j,2713q->bus_address);2714}27152716gaudi_init_dma_core(hdev, dma_id);27172718gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);2719}27202721gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;2722}27232724static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,2725int qman_id, u64 qman_base_addr)2726{2727struct cpu_dyn_regs *dyn_regs =2728&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2729u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;2730u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;2731u32 dma_qm_err_cfg, irq_handler_offset;2732u32 q_off, dma_qm_offset;27332734dma_qm_offset = dma_id * DMA_QMAN_OFFSET;27352736mtr_base_en_lo = lower_32_bits(CFG_BASE +2737mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2738mtr_base_en_hi = upper_32_bits(CFG_BASE +2739mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2740so_base_en_lo = lower_32_bits(CFG_BASE +2741mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2742so_base_en_hi = upper_32_bits(CFG_BASE +2743mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2744mtr_base_ws_lo = lower_32_bits(CFG_BASE +2745mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2746mtr_base_ws_hi = upper_32_bits(CFG_BASE +2747mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2748so_base_ws_lo = lower_32_bits(CFG_BASE +2749mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);2750so_base_ws_hi = upper_32_bits(CFG_BASE +2751mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);27522753q_off = dma_qm_offset + qman_id * 4;27542755if (qman_id < 4) {2756WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,2757lower_32_bits(qman_base_addr));2758WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,2759upper_32_bits(qman_base_addr));27602761WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));2762WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);2763WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);27642765WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,2766QMAN_CPDMA_SIZE_OFFSET);2767WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2768QMAN_CPDMA_SRC_OFFSET);2769WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2770QMAN_CPDMA_DST_OFFSET);2771} else {2772irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?2773mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :2774le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);27752776WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,2777QMAN_LDMA_SIZE_OFFSET);2778WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2779QMAN_LDMA_SRC_OFFSET);2780WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2781QMAN_LDMA_DST_OFFSET);27822783/* Configure RAZWI IRQ */2784dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;2785if (hdev->stop_on_err)2786dma_qm_err_cfg |=2787HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;27882789WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);27902791WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,2792lower_32_bits(CFG_BASE + irq_handler_offset));2793WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,2794upper_32_bits(CFG_BASE + irq_handler_offset));27952796WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,2797gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +2798dma_id);27992800WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,2801QM_ARB_ERR_MSG_EN_MASK);28022803/* Set timeout to maximum */2804WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);28052806WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);2807WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,2808QMAN_INTERNAL_MAKE_TRUSTED);2809}28102811WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);2812WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);2813WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);2814WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);28152816/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */2817if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {2818WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,2819mtr_base_ws_lo);2820WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,2821mtr_base_ws_hi);2822WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,2823so_base_ws_lo);2824WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,2825so_base_ws_hi);2826}2827}28282829static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)2830{2831struct gaudi_device *gaudi = hdev->asic_specific;2832struct gaudi_internal_qman_info *q;2833u64 qman_base_addr;2834int i, j, dma_id, internal_q_index;28352836if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)2837return;28382839for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {2840dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];28412842for (j = 0 ; j < QMAN_STREAMS ; j++) {2843/*2844* Add the CPU queue in order to get the correct queue2845* number as all internal queue are placed after it2846*/2847internal_q_index = dma_id * QMAN_STREAMS + j + 1;28482849q = &gaudi->internal_qmans[internal_q_index];2850qman_base_addr = (u64) q->pq_dma_addr;2851gaudi_init_hbm_dma_qman(hdev, dma_id, j,2852qman_base_addr);2853}28542855/* Initializing lower CP for HBM DMA QMAN */2856gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);28572858gaudi_init_dma_core(hdev, dma_id);28592860gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);2861}28622863gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;2864}28652866static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,2867int qman_id, u64 qman_base_addr)2868{2869struct cpu_dyn_regs *dyn_regs =2870&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2871u32 mtr_base_lo, mtr_base_hi;2872u32 so_base_lo, so_base_hi;2873u32 irq_handler_offset;2874u32 q_off, mme_id;2875u32 mme_qm_err_cfg;28762877mtr_base_lo = lower_32_bits(CFG_BASE +2878mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2879mtr_base_hi = upper_32_bits(CFG_BASE +2880mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2881so_base_lo = lower_32_bits(CFG_BASE +2882mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2883so_base_hi = upper_32_bits(CFG_BASE +2884mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);28852886q_off = mme_offset + qman_id * 4;28872888if (qman_id < 4) {2889WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,2890lower_32_bits(qman_base_addr));2891WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,2892upper_32_bits(qman_base_addr));28932894WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));2895WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);2896WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);28972898WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,2899QMAN_CPDMA_SIZE_OFFSET);2900WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2901QMAN_CPDMA_SRC_OFFSET);2902WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2903QMAN_CPDMA_DST_OFFSET);2904} else {2905irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?2906mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :2907le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);29082909WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,2910QMAN_LDMA_SIZE_OFFSET);2911WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2912QMAN_LDMA_SRC_OFFSET);2913WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2914QMAN_LDMA_DST_OFFSET);29152916/* Configure RAZWI IRQ */2917mme_id = mme_offset /2918(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;29192920mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;2921if (hdev->stop_on_err)2922mme_qm_err_cfg |=2923MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;29242925WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);29262927WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,2928lower_32_bits(CFG_BASE + irq_handler_offset));2929WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,2930upper_32_bits(CFG_BASE + irq_handler_offset));29312932WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,2933gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +2934mme_id);29352936WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,2937QM_ARB_ERR_MSG_EN_MASK);29382939/* Set timeout to maximum */2940WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);29412942WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);2943WREG32(mmMME0_QM_GLBL_PROT + mme_offset,2944QMAN_INTERNAL_MAKE_TRUSTED);2945}29462947WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);2948WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);2949WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);2950WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);2951}29522953static void gaudi_init_mme_qmans(struct hl_device *hdev)2954{2955struct gaudi_device *gaudi = hdev->asic_specific;2956struct gaudi_internal_qman_info *q;2957u64 qman_base_addr;2958u32 mme_offset;2959int i, internal_q_index;29602961if (gaudi->hw_cap_initialized & HW_CAP_MME)2962return;29632964/*2965* map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)2966* and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)2967*/29682969mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;29702971for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {2972internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;2973q = &gaudi->internal_qmans[internal_q_index];2974qman_base_addr = (u64) q->pq_dma_addr;2975gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),2976qman_base_addr);2977if (i == 3)2978mme_offset = 0;2979}29802981/* Initializing lower CP for MME QMANs */2982mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;2983gaudi_init_mme_qman(hdev, mme_offset, 4, 0);2984gaudi_init_mme_qman(hdev, 0, 4, 0);29852986WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);2987WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);29882989gaudi->hw_cap_initialized |= HW_CAP_MME;2990}29912992static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,2993int qman_id, u64 qman_base_addr)2994{2995struct cpu_dyn_regs *dyn_regs =2996&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2997u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;2998u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;2999u32 tpc_qm_err_cfg, irq_handler_offset;3000u32 q_off, tpc_id;30013002mtr_base_en_lo = lower_32_bits(CFG_BASE +3003mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3004mtr_base_en_hi = upper_32_bits(CFG_BASE +3005mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3006so_base_en_lo = lower_32_bits(CFG_BASE +3007mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);3008so_base_en_hi = upper_32_bits(CFG_BASE +3009mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);3010mtr_base_ws_lo = lower_32_bits(CFG_BASE +3011mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3012mtr_base_ws_hi = upper_32_bits(CFG_BASE +3013mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3014so_base_ws_lo = lower_32_bits(CFG_BASE +3015mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);3016so_base_ws_hi = upper_32_bits(CFG_BASE +3017mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);30183019q_off = tpc_offset + qman_id * 4;30203021tpc_id = tpc_offset /3022(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);30233024if (qman_id < 4) {3025WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,3026lower_32_bits(qman_base_addr));3027WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,3028upper_32_bits(qman_base_addr));30293030WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));3031WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);3032WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);30333034WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,3035QMAN_CPDMA_SIZE_OFFSET);3036WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,3037QMAN_CPDMA_SRC_OFFSET);3038WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,3039QMAN_CPDMA_DST_OFFSET);3040} else {3041irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?3042mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :3043le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);30443045WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,3046QMAN_LDMA_SIZE_OFFSET);3047WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,3048QMAN_LDMA_SRC_OFFSET);3049WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,3050QMAN_LDMA_DST_OFFSET);30513052/* Configure RAZWI IRQ */3053tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;3054if (hdev->stop_on_err)3055tpc_qm_err_cfg |=3056TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;30573058WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);30593060WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,3061lower_32_bits(CFG_BASE + irq_handler_offset));3062WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,3063upper_32_bits(CFG_BASE + irq_handler_offset));30643065WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,3066gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +3067tpc_id);30683069WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,3070QM_ARB_ERR_MSG_EN_MASK);30713072/* Set timeout to maximum */3073WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);30743075WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);3076WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,3077QMAN_INTERNAL_MAKE_TRUSTED);3078}30793080WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);3081WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);3082WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);3083WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);30843085/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */3086if (tpc_id == 6) {3087WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,3088mtr_base_ws_lo);3089WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,3090mtr_base_ws_hi);3091WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,3092so_base_ws_lo);3093WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,3094so_base_ws_hi);3095}3096}30973098static void gaudi_init_tpc_qmans(struct hl_device *hdev)3099{3100struct gaudi_device *gaudi = hdev->asic_specific;3101struct gaudi_internal_qman_info *q;3102u64 qman_base_addr;3103u32 so_base_hi, tpc_offset = 0;3104u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -3105mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;3106int i, tpc_id, internal_q_index;31073108if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)3109return;31103111so_base_hi = upper_32_bits(CFG_BASE +3112mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);31133114for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {3115for (i = 0 ; i < QMAN_STREAMS ; i++) {3116internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +3117tpc_id * QMAN_STREAMS + i;3118q = &gaudi->internal_qmans[internal_q_index];3119qman_base_addr = (u64) q->pq_dma_addr;3120gaudi_init_tpc_qman(hdev, tpc_offset, i,3121qman_base_addr);31223123if (i == 3) {3124/* Initializing lower CP for TPC QMAN */3125gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);31263127/* Enable the QMAN and TPC channel */3128WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,3129QMAN_TPC_ENABLE);3130}3131}31323133WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,3134so_base_hi);31353136tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;31373138gaudi->hw_cap_initialized |=3139FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);3140}3141}31423143static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,3144int qman_id, u64 qman_base_addr, int nic_id)3145{3146struct cpu_dyn_regs *dyn_regs =3147&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;3148u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;3149u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;3150u32 nic_qm_err_cfg, irq_handler_offset;3151u32 q_off;31523153mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +3154mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3155mtr_base_en_hi = upper_32_bits(CFG_BASE +3156mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3157so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +3158mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);3159so_base_en_hi = upper_32_bits(CFG_BASE +3160mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);3161mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +3162mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3163mtr_base_ws_hi = upper_32_bits(CFG_BASE +3164mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3165so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +3166mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);3167so_base_ws_hi = upper_32_bits(CFG_BASE +3168mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);31693170q_off = nic_offset + qman_id * 4;31713172WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));3173WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));31743175WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));3176WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);3177WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);31783179WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,3180QMAN_LDMA_SIZE_OFFSET);3181WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,3182QMAN_LDMA_SRC_OFFSET);3183WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,3184QMAN_LDMA_DST_OFFSET);31853186WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);3187WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);3188WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);3189WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);31903191/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */3192WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);3193WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);3194WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);3195WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);31963197if (qman_id == 0) {3198irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?3199mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :3200le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);32013202/* Configure RAZWI IRQ */3203nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;3204if (hdev->stop_on_err)3205nic_qm_err_cfg |=3206NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;32073208WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);32093210WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,3211lower_32_bits(CFG_BASE + irq_handler_offset));3212WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,3213upper_32_bits(CFG_BASE + irq_handler_offset));32143215WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,3216gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +3217nic_id);32183219WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,3220QM_ARB_ERR_MSG_EN_MASK);32213222/* Set timeout to maximum */3223WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);32243225WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);3226WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,3227QMAN_INTERNAL_MAKE_TRUSTED);3228}3229}32303231static void gaudi_init_nic_qmans(struct hl_device *hdev)3232{3233struct gaudi_device *gaudi = hdev->asic_specific;3234struct gaudi_internal_qman_info *q;3235u64 qman_base_addr;3236u32 nic_offset = 0;3237u32 nic_delta_between_qmans =3238mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;3239u32 nic_delta_between_nics =3240mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;3241int i, nic_id, internal_q_index;32423243if (!hdev->nic_ports_mask)3244return;32453246if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)3247return;32483249dev_dbg(hdev->dev, "Initializing NIC QMANs\n");32503251for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {3252if (!(hdev->nic_ports_mask & (1 << nic_id))) {3253nic_offset += nic_delta_between_qmans;3254if (nic_id & 1) {3255nic_offset -= (nic_delta_between_qmans * 2);3256nic_offset += nic_delta_between_nics;3257}3258continue;3259}32603261for (i = 0 ; i < QMAN_STREAMS ; i++) {3262internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +3263nic_id * QMAN_STREAMS + i;3264q = &gaudi->internal_qmans[internal_q_index];3265qman_base_addr = (u64) q->pq_dma_addr;3266gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),3267qman_base_addr, nic_id);3268}32693270/* Enable the QMAN */3271WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);32723273nic_offset += nic_delta_between_qmans;3274if (nic_id & 1) {3275nic_offset -= (nic_delta_between_qmans * 2);3276nic_offset += nic_delta_between_nics;3277}32783279gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);3280}3281}32823283static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)3284{3285struct gaudi_device *gaudi = hdev->asic_specific;32863287if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))3288return;32893290WREG32(mmDMA0_QM_GLBL_CFG0, 0);3291WREG32(mmDMA1_QM_GLBL_CFG0, 0);3292WREG32(mmDMA5_QM_GLBL_CFG0, 0);3293}32943295static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)3296{3297struct gaudi_device *gaudi = hdev->asic_specific;32983299if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))3300return;33013302WREG32(mmDMA2_QM_GLBL_CFG0, 0);3303WREG32(mmDMA3_QM_GLBL_CFG0, 0);3304WREG32(mmDMA4_QM_GLBL_CFG0, 0);3305WREG32(mmDMA6_QM_GLBL_CFG0, 0);3306WREG32(mmDMA7_QM_GLBL_CFG0, 0);3307}33083309static void gaudi_disable_mme_qmans(struct hl_device *hdev)3310{3311struct gaudi_device *gaudi = hdev->asic_specific;33123313if (!(gaudi->hw_cap_initialized & HW_CAP_MME))3314return;33153316WREG32(mmMME2_QM_GLBL_CFG0, 0);3317WREG32(mmMME0_QM_GLBL_CFG0, 0);3318}33193320static void gaudi_disable_tpc_qmans(struct hl_device *hdev)3321{3322struct gaudi_device *gaudi = hdev->asic_specific;3323u32 tpc_offset = 0;3324int tpc_id;33253326if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))3327return;33283329for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {3330WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);3331tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;3332}3333}33343335static void gaudi_disable_nic_qmans(struct hl_device *hdev)3336{3337struct gaudi_device *gaudi = hdev->asic_specific;3338u32 nic_mask, nic_offset = 0;3339u32 nic_delta_between_qmans =3340mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;3341u32 nic_delta_between_nics =3342mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;3343int nic_id;33443345for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {3346nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);33473348if (gaudi->hw_cap_initialized & nic_mask)3349WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);33503351nic_offset += nic_delta_between_qmans;3352if (nic_id & 1) {3353nic_offset -= (nic_delta_between_qmans * 2);3354nic_offset += nic_delta_between_nics;3355}3356}3357}33583359static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)3360{3361struct gaudi_device *gaudi = hdev->asic_specific;33623363if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))3364return;33653366/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */3367WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3368WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3369WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3370}33713372static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)3373{3374struct gaudi_device *gaudi = hdev->asic_specific;33753376if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))3377return;33783379/* Stop CPs of HBM DMA QMANs */33803381WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3382WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3383WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3384WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3385WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3386}33873388static void gaudi_stop_mme_qmans(struct hl_device *hdev)3389{3390struct gaudi_device *gaudi = hdev->asic_specific;33913392if (!(gaudi->hw_cap_initialized & HW_CAP_MME))3393return;33943395/* Stop CPs of MME QMANs */3396WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);3397WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);3398}33993400static void gaudi_stop_tpc_qmans(struct hl_device *hdev)3401{3402struct gaudi_device *gaudi = hdev->asic_specific;34033404if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))3405return;34063407WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3408WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3409WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3410WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3411WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3412WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3413WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3414WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3415}34163417static void gaudi_stop_nic_qmans(struct hl_device *hdev)3418{3419struct gaudi_device *gaudi = hdev->asic_specific;34203421/* Stop upper CPs of QMANs */34223423if (gaudi->hw_cap_initialized & HW_CAP_NIC0)3424WREG32(mmNIC0_QM0_GLBL_CFG1,3425NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3426NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3427NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34283429if (gaudi->hw_cap_initialized & HW_CAP_NIC1)3430WREG32(mmNIC0_QM1_GLBL_CFG1,3431NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3432NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3433NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34343435if (gaudi->hw_cap_initialized & HW_CAP_NIC2)3436WREG32(mmNIC1_QM0_GLBL_CFG1,3437NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3438NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3439NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34403441if (gaudi->hw_cap_initialized & HW_CAP_NIC3)3442WREG32(mmNIC1_QM1_GLBL_CFG1,3443NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3444NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3445NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34463447if (gaudi->hw_cap_initialized & HW_CAP_NIC4)3448WREG32(mmNIC2_QM0_GLBL_CFG1,3449NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3450NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3451NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34523453if (gaudi->hw_cap_initialized & HW_CAP_NIC5)3454WREG32(mmNIC2_QM1_GLBL_CFG1,3455NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3456NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3457NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34583459if (gaudi->hw_cap_initialized & HW_CAP_NIC6)3460WREG32(mmNIC3_QM0_GLBL_CFG1,3461NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3462NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3463NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34643465if (gaudi->hw_cap_initialized & HW_CAP_NIC7)3466WREG32(mmNIC3_QM1_GLBL_CFG1,3467NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3468NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3469NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34703471if (gaudi->hw_cap_initialized & HW_CAP_NIC8)3472WREG32(mmNIC4_QM0_GLBL_CFG1,3473NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3474NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3475NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34763477if (gaudi->hw_cap_initialized & HW_CAP_NIC9)3478WREG32(mmNIC4_QM1_GLBL_CFG1,3479NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3480NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3481NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);3482}34833484static void gaudi_pci_dma_stall(struct hl_device *hdev)3485{3486struct gaudi_device *gaudi = hdev->asic_specific;34873488if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))3489return;34903491WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3492WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3493WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3494}34953496static void gaudi_hbm_dma_stall(struct hl_device *hdev)3497{3498struct gaudi_device *gaudi = hdev->asic_specific;34993500if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))3501return;35023503WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3504WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3505WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3506WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3507WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3508}35093510static void gaudi_mme_stall(struct hl_device *hdev)3511{3512struct gaudi_device *gaudi = hdev->asic_specific;35133514if (!(gaudi->hw_cap_initialized & HW_CAP_MME))3515return;35163517/* WA for H3-1800 bug: do ACC and SBAB writes twice */3518WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3519WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3520WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3521WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3522WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3523WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3524WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3525WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3526WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3527WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3528WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3529WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3530WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3531WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3532WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3533WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3534}35353536static void gaudi_tpc_stall(struct hl_device *hdev)3537{3538struct gaudi_device *gaudi = hdev->asic_specific;35393540if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))3541return;35423543WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3544WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3545WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3546WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3547WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3548WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3549WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3550WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3551}35523553static void gaudi_disable_clock_gating(struct hl_device *hdev)3554{3555u32 qman_offset;3556int i;35573558if (hdev->asic_prop.fw_security_enabled)3559return;35603561for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {3562WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);3563WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);35643565qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);3566}35673568WREG32(mmMME0_QM_CGM_CFG, 0);3569WREG32(mmMME0_QM_CGM_CFG1, 0);3570WREG32(mmMME2_QM_CGM_CFG, 0);3571WREG32(mmMME2_QM_CGM_CFG1, 0);35723573for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {3574WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);3575WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);35763577qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);3578}3579}35803581static void gaudi_enable_timestamp(struct hl_device *hdev)3582{3583/* Disable the timestamp counter */3584WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);35853586/* Zero the lower/upper parts of the 64-bit counter */3587WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);3588WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);35893590/* Enable the counter */3591WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);3592}35933594static void gaudi_disable_timestamp(struct hl_device *hdev)3595{3596/* Disable the timestamp counter */3597WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);3598}35993600static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)3601{3602u32 wait_timeout_ms;36033604if (hdev->pldm)3605wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;3606else3607wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;36083609if (fw_reset)3610goto skip_engines;36113612gaudi_stop_nic_qmans(hdev);3613gaudi_stop_mme_qmans(hdev);3614gaudi_stop_tpc_qmans(hdev);3615gaudi_stop_hbm_dma_qmans(hdev);3616gaudi_stop_pci_dma_qmans(hdev);36173618msleep(wait_timeout_ms);36193620gaudi_pci_dma_stall(hdev);3621gaudi_hbm_dma_stall(hdev);3622gaudi_tpc_stall(hdev);3623gaudi_mme_stall(hdev);36243625msleep(wait_timeout_ms);36263627gaudi_disable_nic_qmans(hdev);3628gaudi_disable_mme_qmans(hdev);3629gaudi_disable_tpc_qmans(hdev);3630gaudi_disable_hbm_dma_qmans(hdev);3631gaudi_disable_pci_dma_qmans(hdev);36323633gaudi_disable_timestamp(hdev);36343635skip_engines:3636gaudi_disable_msi(hdev);3637}36383639static int gaudi_mmu_init(struct hl_device *hdev)3640{3641struct asic_fixed_properties *prop = &hdev->asic_prop;3642struct gaudi_device *gaudi = hdev->asic_specific;3643u64 hop0_addr;3644int rc, i;36453646if (gaudi->hw_cap_initialized & HW_CAP_MMU)3647return 0;36483649for (i = 0 ; i < prop->max_asid ; i++) {3650hop0_addr = prop->mmu_pgt_addr +3651(i * prop->dmmu.hop_table_size);36523653rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);3654if (rc) {3655dev_err(hdev->dev,3656"failed to set hop0 addr for asid %d\n", i);3657return rc;3658}3659}36603661/* init MMU cache manage page */3662WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);3663WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);36643665/* mem cache invalidation */3666WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);36673668rc = hl_mmu_invalidate_cache(hdev, true, 0);3669if (rc)3670return rc;36713672WREG32(mmMMU_UP_MMU_ENABLE, 1);3673WREG32(mmMMU_UP_SPI_MASK, 0xF);36743675WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);36763677/*3678* The H/W expects the first PI after init to be 1. After wraparound3679* we'll write 0.3680*/3681gaudi->mmu_cache_inv_pi = 1;36823683gaudi->hw_cap_initialized |= HW_CAP_MMU;36843685return 0;3686}36873688static int gaudi_load_firmware_to_device(struct hl_device *hdev)3689{3690void __iomem *dst;36913692dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;36933694return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);3695}36963697static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)3698{3699void __iomem *dst;37003701dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;37023703return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);3704}37053706static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)3707{3708struct dynamic_fw_load_mgr *dynamic_loader;3709struct cpu_dyn_regs *dyn_regs;37103711dynamic_loader = &hdev->fw_loader.dynamic_loader;37123713/*3714* here we update initial values for few specific dynamic regs (as3715* before reading the first descriptor from FW those value has to be3716* hard-coded) in later stages of the protocol those values will be3717* updated automatically by reading the FW descriptor so data there3718* will always be up-to-date3719*/3720dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;3721dyn_regs->kmd_msg_to_cpu =3722cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);3723dyn_regs->cpu_cmd_status_to_host =3724cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);37253726dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;3727}37283729static void gaudi_init_static_firmware_loader(struct hl_device *hdev)3730{3731struct static_fw_load_mgr *static_loader;37323733static_loader = &hdev->fw_loader.static_loader;37343735static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;3736static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;3737static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;3738static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;3739static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;3740static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;3741static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;3742static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;3743static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;3744static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;3745static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;3746static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));3747static_loader->cpu_reset_wait_msec = hdev->pldm ?3748GAUDI_PLDM_RESET_WAIT_MSEC :3749GAUDI_CPU_RESET_WAIT_MSEC;3750}37513752static void gaudi_init_firmware_preload_params(struct hl_device *hdev)3753{3754struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;37553756pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;3757pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;3758pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;3759pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;3760pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;3761pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;3762}37633764static void gaudi_init_firmware_loader(struct hl_device *hdev)3765{3766struct asic_fixed_properties *prop = &hdev->asic_prop;3767struct fw_load_mgr *fw_loader = &hdev->fw_loader;37683769/* fill common fields */3770fw_loader->fw_comp_loaded = FW_TYPE_NONE;3771fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;3772fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;3773fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;3774fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;3775fw_loader->skip_bmc = !hdev->bmc_enable;3776fw_loader->sram_bar_id = SRAM_BAR_ID;3777fw_loader->dram_bar_id = HBM_BAR_ID;37783779if (prop->dynamic_fw_load)3780gaudi_init_dynamic_firmware_loader(hdev);3781else3782gaudi_init_static_firmware_loader(hdev);3783}37843785static int gaudi_init_cpu(struct hl_device *hdev)3786{3787struct gaudi_device *gaudi = hdev->asic_specific;3788int rc;37893790if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))3791return 0;37923793if (gaudi->hw_cap_initialized & HW_CAP_CPU)3794return 0;37953796/*3797* The device CPU works with 40 bits addresses.3798* This register sets the extension to 50 bits.3799*/3800if (!hdev->asic_prop.fw_security_enabled)3801WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);38023803rc = hl_fw_init_cpu(hdev);38043805if (rc)3806return rc;38073808gaudi->hw_cap_initialized |= HW_CAP_CPU;38093810return 0;3811}38123813static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)3814{3815struct cpu_dyn_regs *dyn_regs =3816&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;3817struct asic_fixed_properties *prop = &hdev->asic_prop;3818struct gaudi_device *gaudi = hdev->asic_specific;3819u32 status, irq_handler_offset;3820struct hl_eq *eq;3821struct hl_hw_queue *cpu_pq =3822&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];3823int err;38243825if (!hdev->cpu_queues_enable)3826return 0;38273828if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)3829return 0;38303831eq = &hdev->event_queue;38323833WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));3834WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));38353836WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));3837WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));38383839WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,3840lower_32_bits(hdev->cpu_accessible_dma_address));3841WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,3842upper_32_bits(hdev->cpu_accessible_dma_address));38433844WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);3845WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);3846WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);38473848/* Used for EQ CI */3849WREG32(mmCPU_IF_EQ_RD_OFFS, 0);38503851WREG32(mmCPU_IF_PF_PQ_PI, 0);38523853WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);38543855irq_handler_offset = prop->gic_interrupts_enable ?3856mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :3857le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);38583859WREG32(irq_handler_offset,3860gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);38613862err = hl_poll_timeout(3863hdev,3864mmCPU_IF_QUEUE_INIT,3865status,3866(status == PQ_INIT_STATUS_READY_FOR_HOST),38671000,3868cpu_timeout);38693870if (err) {3871dev_err(hdev->dev,3872"Failed to communicate with Device CPU (CPU-CP timeout)\n");3873return -EIO;3874}38753876/* update FW application security bits */3877if (prop->fw_cpu_boot_dev_sts0_valid)3878prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);3879if (prop->fw_cpu_boot_dev_sts1_valid)3880prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);38813882gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;3883return 0;3884}38853886static void gaudi_pre_hw_init(struct hl_device *hdev)3887{3888/* Perform read from the device to make sure device is up */3889RREG32(mmHW_STATE);38903891if (!hdev->asic_prop.fw_security_enabled) {3892/* Set the access through PCI bars (Linux driver only) as3893* secured3894*/3895WREG32(mmPCIE_WRAP_LBW_PROT_OVR,3896(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |3897PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));38983899/* Perform read to flush the waiting writes to ensure3900* configuration was set in the device3901*/3902RREG32(mmPCIE_WRAP_LBW_PROT_OVR);3903}39043905/*3906* Let's mark in the H/W that we have reached this point. We check3907* this value in the reset_before_init function to understand whether3908* we need to reset the chip before doing H/W init. This register is3909* cleared by the H/W upon H/W reset3910*/3911WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);3912}39133914static int gaudi_hw_init(struct hl_device *hdev)3915{3916struct gaudi_device *gaudi = hdev->asic_specific;3917int rc;39183919gaudi_pre_hw_init(hdev);39203921/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.3922* So we set it here and if anyone tries to move it later to3923* a different address, there will be an error3924*/3925if (hdev->asic_prop.iatu_done_by_fw)3926gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;39273928/*3929* Before pushing u-boot/linux to device, need to set the hbm bar to3930* base address of dram3931*/3932if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {3933dev_err(hdev->dev,3934"failed to map HBM bar to DRAM base address\n");3935return -EIO;3936}39373938rc = gaudi_init_cpu(hdev);3939if (rc) {3940dev_err(hdev->dev, "failed to initialize CPU\n");3941return rc;3942}39433944/* In case the clock gating was enabled in preboot we need to disable3945* it here before touching the MME/TPC registers.3946*/3947gaudi_disable_clock_gating(hdev);39483949/* SRAM scrambler must be initialized after CPU is running from HBM */3950gaudi_init_scrambler_sram(hdev);39513952/* This is here just in case we are working without CPU */3953gaudi_init_scrambler_hbm(hdev);39543955gaudi_init_golden_registers(hdev);39563957rc = gaudi_mmu_init(hdev);3958if (rc)3959return rc;39603961gaudi_init_security(hdev);39623963gaudi_init_pci_dma_qmans(hdev);39643965gaudi_init_hbm_dma_qmans(hdev);39663967gaudi_init_mme_qmans(hdev);39683969gaudi_init_tpc_qmans(hdev);39703971gaudi_init_nic_qmans(hdev);39723973gaudi_enable_timestamp(hdev);39743975/* MSI must be enabled before CPU queues and NIC are initialized */3976rc = gaudi_enable_msi(hdev);3977if (rc)3978goto disable_queues;39793980/* must be called after MSI was enabled */3981rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);3982if (rc) {3983dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",3984rc);3985goto disable_msi;3986}39873988/* Perform read from the device to flush all configuration */3989RREG32(mmHW_STATE);39903991return 0;39923993disable_msi:3994gaudi_disable_msi(hdev);3995disable_queues:3996gaudi_disable_mme_qmans(hdev);3997gaudi_disable_pci_dma_qmans(hdev);39983999return rc;4000}40014002static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)4003{4004struct cpu_dyn_regs *dyn_regs =4005&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;4006u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;4007struct gaudi_device *gaudi = hdev->asic_specific;4008bool driver_performs_reset;40094010if (!hard_reset) {4011dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");4012return 0;4013}40144015if (hdev->pldm) {4016reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;4017cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;4018} else {4019reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;4020cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;4021}40224023if (fw_reset) {4024dev_dbg(hdev->dev,4025"Firmware performs HARD reset, going to wait %dms\n",4026reset_timeout_ms);40274028goto skip_reset;4029}40304031driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&4032!hdev->asic_prop.hard_reset_done_by_fw);40334034/* Set device to handle FLR by H/W as we will put the device CPU to4035* halt mode4036*/4037if (driver_performs_reset)4038WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |4039PCIE_AUX_FLR_CTRL_INT_MASK_MASK));40404041/* If linux is loaded in the device CPU we need to communicate with it4042* via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU4043* registers in case of old F/Ws4044*/4045if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {4046irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?4047mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :4048le32_to_cpu(dyn_regs->gic_host_halt_irq);40494050WREG32(irq_handler_offset,4051gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);40524053/* This is a hail-mary attempt to revive the card in the small chance that the4054* f/w has experienced a watchdog event, which caused it to return back to preboot.4055* In that case, triggering reset through GIC won't help. We need to trigger the4056* reset as if Linux wasn't loaded.4057*4058* We do it only if the reset cause was HB, because that would be the indication4059* of such an event.4060*4061* In case watchdog hasn't expired but we still got HB, then this won't do any4062* damage.4063*/4064if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {4065if (hdev->asic_prop.hard_reset_done_by_fw)4066hl_fw_ask_hard_reset_without_linux(hdev);4067else4068hl_fw_ask_halt_machine_without_linux(hdev);4069}4070} else {4071if (hdev->asic_prop.hard_reset_done_by_fw)4072hl_fw_ask_hard_reset_without_linux(hdev);4073else4074hl_fw_ask_halt_machine_without_linux(hdev);4075}40764077if (driver_performs_reset) {40784079/* Configure the reset registers. Must be done as early as4080* possible in case we fail during H/W initialization4081*/4082WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,4083(CFG_RST_H_DMA_MASK |4084CFG_RST_H_MME_MASK |4085CFG_RST_H_SM_MASK |4086CFG_RST_H_TPC_7_MASK));40874088WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);40894090WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,4091(CFG_RST_H_HBM_MASK |4092CFG_RST_H_TPC_7_MASK |4093CFG_RST_H_NIC_MASK |4094CFG_RST_H_SM_MASK |4095CFG_RST_H_DMA_MASK |4096CFG_RST_H_MME_MASK |4097CFG_RST_H_CPU_MASK |4098CFG_RST_H_MMU_MASK));40994100WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,4101(CFG_RST_L_IF_MASK |4102CFG_RST_L_PSOC_MASK |4103CFG_RST_L_TPC_MASK));41044105msleep(cpu_timeout_ms);41064107/* Tell ASIC not to re-initialize PCIe */4108WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);41094110/* Restart BTL/BLR upon hard-reset */4111WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);41124113WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,41141 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);41154116dev_dbg(hdev->dev,4117"Issued HARD reset command, going to wait %dms\n",4118reset_timeout_ms);4119} else {4120dev_dbg(hdev->dev,4121"Firmware performs HARD reset, going to wait %dms\n",4122reset_timeout_ms);4123}41244125skip_reset:4126/*4127* After hard reset, we can't poll the BTM_FSM register because the PSOC4128* itself is in reset. Need to wait until the reset is deasserted4129*/4130msleep(reset_timeout_ms);41314132status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);4133if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {4134dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);4135return -ETIMEDOUT;4136}41374138if (gaudi) {4139gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |4140HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |4141HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |4142HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |4143HW_CAP_HBM_SCRAMBLER);41444145memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));41464147hdev->device_cpu_is_halted = false;4148}4149return 0;4150}41514152static int gaudi_suspend(struct hl_device *hdev)4153{4154return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);4155}41564157static int gaudi_resume(struct hl_device *hdev)4158{4159return gaudi_init_iatu(hdev);4160}41614162static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,4163void *cpu_addr, dma_addr_t dma_addr, size_t size)4164{4165int rc;41664167vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |4168VM_DONTCOPY | VM_NORESERVE);41694170#ifdef _HAS_DMA_MMAP_COHERENT4171/*4172* If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP4173* so vm_insert_page() can handle it safely. Without this, the kernel4174* may BUG_ON due to VM_PFNMAP.4175*/4176if (is_vmalloc_addr(cpu_addr))4177vm_flags_set(vma, VM_MIXEDMAP);41784179rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,4180(dma_addr - HOST_PHYS_BASE), size);4181if (rc)4182dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);4183#else41844185rc = remap_pfn_range(vma, vma->vm_start,4186virt_to_phys(cpu_addr) >> PAGE_SHIFT,4187size, vma->vm_page_prot);4188if (rc)4189dev_err(hdev->dev, "remap_pfn_range error %d", rc);41904191#endif419241934194return rc;4195}41964197static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)4198{4199struct cpu_dyn_regs *dyn_regs =4200&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;4201u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;4202struct gaudi_device *gaudi = hdev->asic_specific;4203bool invalid_queue = false;4204int dma_id;42054206switch (hw_queue_id) {4207case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:4208dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];4209dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4210q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;4211db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4212break;42134214case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:4215dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];4216dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4217q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;4218db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4219break;42204221case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:4222dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];4223dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4224q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4225db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4226break;42274228case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:4229dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];4230dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4231q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4232db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4233break;42344235case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:4236dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];4237dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4238q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4239db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4240break;42414242case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:4243dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];4244dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4245q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4246db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4247break;42484249case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:4250dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];4251dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4252q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4253db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4254break;42554256case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:4257dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];4258dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4259q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4260db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4261break;42624263case GAUDI_QUEUE_ID_CPU_PQ:4264if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)4265db_reg_offset = mmCPU_IF_PF_PQ_PI;4266else4267invalid_queue = true;4268break;42694270case GAUDI_QUEUE_ID_MME_0_0:4271db_reg_offset = mmMME2_QM_PQ_PI_0;4272break;42734274case GAUDI_QUEUE_ID_MME_0_1:4275db_reg_offset = mmMME2_QM_PQ_PI_1;4276break;42774278case GAUDI_QUEUE_ID_MME_0_2:4279db_reg_offset = mmMME2_QM_PQ_PI_2;4280break;42814282case GAUDI_QUEUE_ID_MME_0_3:4283db_reg_offset = mmMME2_QM_PQ_PI_3;4284break;42854286case GAUDI_QUEUE_ID_MME_1_0:4287db_reg_offset = mmMME0_QM_PQ_PI_0;4288break;42894290case GAUDI_QUEUE_ID_MME_1_1:4291db_reg_offset = mmMME0_QM_PQ_PI_1;4292break;42934294case GAUDI_QUEUE_ID_MME_1_2:4295db_reg_offset = mmMME0_QM_PQ_PI_2;4296break;42974298case GAUDI_QUEUE_ID_MME_1_3:4299db_reg_offset = mmMME0_QM_PQ_PI_3;4300break;43014302case GAUDI_QUEUE_ID_TPC_0_0:4303db_reg_offset = mmTPC0_QM_PQ_PI_0;4304break;43054306case GAUDI_QUEUE_ID_TPC_0_1:4307db_reg_offset = mmTPC0_QM_PQ_PI_1;4308break;43094310case GAUDI_QUEUE_ID_TPC_0_2:4311db_reg_offset = mmTPC0_QM_PQ_PI_2;4312break;43134314case GAUDI_QUEUE_ID_TPC_0_3:4315db_reg_offset = mmTPC0_QM_PQ_PI_3;4316break;43174318case GAUDI_QUEUE_ID_TPC_1_0:4319db_reg_offset = mmTPC1_QM_PQ_PI_0;4320break;43214322case GAUDI_QUEUE_ID_TPC_1_1:4323db_reg_offset = mmTPC1_QM_PQ_PI_1;4324break;43254326case GAUDI_QUEUE_ID_TPC_1_2:4327db_reg_offset = mmTPC1_QM_PQ_PI_2;4328break;43294330case GAUDI_QUEUE_ID_TPC_1_3:4331db_reg_offset = mmTPC1_QM_PQ_PI_3;4332break;43334334case GAUDI_QUEUE_ID_TPC_2_0:4335db_reg_offset = mmTPC2_QM_PQ_PI_0;4336break;43374338case GAUDI_QUEUE_ID_TPC_2_1:4339db_reg_offset = mmTPC2_QM_PQ_PI_1;4340break;43414342case GAUDI_QUEUE_ID_TPC_2_2:4343db_reg_offset = mmTPC2_QM_PQ_PI_2;4344break;43454346case GAUDI_QUEUE_ID_TPC_2_3:4347db_reg_offset = mmTPC2_QM_PQ_PI_3;4348break;43494350case GAUDI_QUEUE_ID_TPC_3_0:4351db_reg_offset = mmTPC3_QM_PQ_PI_0;4352break;43534354case GAUDI_QUEUE_ID_TPC_3_1:4355db_reg_offset = mmTPC3_QM_PQ_PI_1;4356break;43574358case GAUDI_QUEUE_ID_TPC_3_2:4359db_reg_offset = mmTPC3_QM_PQ_PI_2;4360break;43614362case GAUDI_QUEUE_ID_TPC_3_3:4363db_reg_offset = mmTPC3_QM_PQ_PI_3;4364break;43654366case GAUDI_QUEUE_ID_TPC_4_0:4367db_reg_offset = mmTPC4_QM_PQ_PI_0;4368break;43694370case GAUDI_QUEUE_ID_TPC_4_1:4371db_reg_offset = mmTPC4_QM_PQ_PI_1;4372break;43734374case GAUDI_QUEUE_ID_TPC_4_2:4375db_reg_offset = mmTPC4_QM_PQ_PI_2;4376break;43774378case GAUDI_QUEUE_ID_TPC_4_3:4379db_reg_offset = mmTPC4_QM_PQ_PI_3;4380break;43814382case GAUDI_QUEUE_ID_TPC_5_0:4383db_reg_offset = mmTPC5_QM_PQ_PI_0;4384break;43854386case GAUDI_QUEUE_ID_TPC_5_1:4387db_reg_offset = mmTPC5_QM_PQ_PI_1;4388break;43894390case GAUDI_QUEUE_ID_TPC_5_2:4391db_reg_offset = mmTPC5_QM_PQ_PI_2;4392break;43934394case GAUDI_QUEUE_ID_TPC_5_3:4395db_reg_offset = mmTPC5_QM_PQ_PI_3;4396break;43974398case GAUDI_QUEUE_ID_TPC_6_0:4399db_reg_offset = mmTPC6_QM_PQ_PI_0;4400break;44014402case GAUDI_QUEUE_ID_TPC_6_1:4403db_reg_offset = mmTPC6_QM_PQ_PI_1;4404break;44054406case GAUDI_QUEUE_ID_TPC_6_2:4407db_reg_offset = mmTPC6_QM_PQ_PI_2;4408break;44094410case GAUDI_QUEUE_ID_TPC_6_3:4411db_reg_offset = mmTPC6_QM_PQ_PI_3;4412break;44134414case GAUDI_QUEUE_ID_TPC_7_0:4415db_reg_offset = mmTPC7_QM_PQ_PI_0;4416break;44174418case GAUDI_QUEUE_ID_TPC_7_1:4419db_reg_offset = mmTPC7_QM_PQ_PI_1;4420break;44214422case GAUDI_QUEUE_ID_TPC_7_2:4423db_reg_offset = mmTPC7_QM_PQ_PI_2;4424break;44254426case GAUDI_QUEUE_ID_TPC_7_3:4427db_reg_offset = mmTPC7_QM_PQ_PI_3;4428break;44294430case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:4431if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))4432invalid_queue = true;44334434q_off = ((hw_queue_id - 1) & 0x3) * 4;4435db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;4436break;44374438case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:4439if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))4440invalid_queue = true;44414442q_off = ((hw_queue_id - 1) & 0x3) * 4;4443db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;4444break;44454446case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:4447if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))4448invalid_queue = true;44494450q_off = ((hw_queue_id - 1) & 0x3) * 4;4451db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;4452break;44534454case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:4455if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))4456invalid_queue = true;44574458q_off = ((hw_queue_id - 1) & 0x3) * 4;4459db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;4460break;44614462case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:4463if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))4464invalid_queue = true;44654466q_off = ((hw_queue_id - 1) & 0x3) * 4;4467db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;4468break;44694470case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:4471if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))4472invalid_queue = true;44734474q_off = ((hw_queue_id - 1) & 0x3) * 4;4475db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;4476break;44774478case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:4479if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))4480invalid_queue = true;44814482q_off = ((hw_queue_id - 1) & 0x3) * 4;4483db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;4484break;44854486case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:4487if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))4488invalid_queue = true;44894490q_off = ((hw_queue_id - 1) & 0x3) * 4;4491db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;4492break;44934494case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:4495if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))4496invalid_queue = true;44974498q_off = ((hw_queue_id - 1) & 0x3) * 4;4499db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;4500break;45014502case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:4503if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))4504invalid_queue = true;45054506q_off = ((hw_queue_id - 1) & 0x3) * 4;4507db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;4508break;45094510default:4511invalid_queue = true;4512}45134514if (invalid_queue) {4515/* Should never get here */4516dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",4517hw_queue_id);4518return;4519}45204521db_value = pi;45224523/* ring the doorbell */4524WREG32(db_reg_offset, db_value);45254526if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {4527/* make sure device CPU will read latest data from host */4528mb();45294530irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?4531mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :4532le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);45334534WREG32(irq_handler_offset,4535gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);4536}4537}45384539static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,4540struct hl_bd *bd)4541{4542__le64 *pbd = (__le64 *) bd;45434544/* The QMANs are on the host memory so a simple copy suffice */4545pqe[0] = pbd[0];4546pqe[1] = pbd[1];4547}45484549static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,4550dma_addr_t *dma_handle, gfp_t flags)4551{4552void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,4553dma_handle, flags);45544555/* Shift to the device's base physical address of host memory */4556if (kernel_addr)4557*dma_handle += HOST_PHYS_BASE;45584559return kernel_addr;4560}45614562static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,4563void *cpu_addr, dma_addr_t dma_handle)4564{4565/* Cancel the device's base physical address of host memory */4566dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;45674568dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);4569}45704571static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)4572{4573struct asic_fixed_properties *prop = &hdev->asic_prop;4574u64 cur_addr = prop->dram_user_base_address;4575u32 chunk_size, busy;4576int rc, dma_id;45774578while (cur_addr < prop->dram_end_address) {4579for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {4580u32 dma_offset = dma_id * DMA_CORE_OFFSET;45814582chunk_size =4583min((u64)SZ_2G, prop->dram_end_address - cur_addr);45844585dev_dbg(hdev->dev,4586"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",4587cur_addr, cur_addr + chunk_size);45884589WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,4590lower_32_bits(val));4591WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,4592upper_32_bits(val));4593WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,4594lower_32_bits(cur_addr));4595WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,4596upper_32_bits(cur_addr));4597WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,4598chunk_size);4599WREG32(mmDMA0_CORE_COMMIT + dma_offset,4600((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |4601(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));46024603cur_addr += chunk_size;46044605if (cur_addr == prop->dram_end_address)4606break;4607}46084609for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {4610u32 dma_offset = dma_id * DMA_CORE_OFFSET;46114612rc = hl_poll_timeout(4613hdev,4614mmDMA0_CORE_STS0 + dma_offset,4615busy,4616((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),46171000,4618HBM_SCRUBBING_TIMEOUT_US);46194620if (rc) {4621dev_err(hdev->dev,4622"DMA Timeout during HBM scrubbing of DMA #%d\n",4623dma_id);4624return -EIO;4625}4626}4627}46284629return 0;4630}46314632static int gaudi_scrub_device_mem(struct hl_device *hdev)4633{4634struct asic_fixed_properties *prop = &hdev->asic_prop;4635u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;4636u64 addr, size, val = hdev->memory_scrub_val;4637ktime_t timeout;4638int rc = 0;46394640if (!hdev->memory_scrub)4641return 0;46424643timeout = ktime_add_us(ktime_get(), wait_to_idle_time);4644while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {4645if (ktime_compare(ktime_get(), timeout) > 0) {4646dev_err(hdev->dev, "waiting for idle timeout\n");4647return -ETIMEDOUT;4648}4649usleep_range((1000 >> 2) + 1, 1000);4650}46514652/* Scrub SRAM */4653addr = prop->sram_user_base_address;4654size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;46554656dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",4657addr, addr + size, val);4658rc = gaudi_memset_device_memory(hdev, addr, size, val);4659if (rc) {4660dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);4661return rc;4662}46634664/* Scrub HBM using all DMA channels in parallel */4665rc = gaudi_scrub_device_dram(hdev, val);4666if (rc) {4667dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);4668return rc;4669}46704671return 0;4672}46734674static void *gaudi_get_int_queue_base(struct hl_device *hdev,4675u32 queue_id, dma_addr_t *dma_handle,4676u16 *queue_len)4677{4678struct gaudi_device *gaudi = hdev->asic_specific;4679struct gaudi_internal_qman_info *q;46804681if (queue_id >= GAUDI_QUEUE_ID_SIZE ||4682gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {4683dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);4684return NULL;4685}46864687q = &gaudi->internal_qmans[queue_id];4688*dma_handle = q->pq_dma_addr;4689*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;46904691return q->pq_kernel_addr;4692}46934694static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,4695u16 len, u32 timeout, u64 *result)4696{4697struct gaudi_device *gaudi = hdev->asic_specific;46984699if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {4700if (result)4701*result = 0;4702return 0;4703}47044705if (!timeout)4706timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;47074708return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,4709timeout, result);4710}47114712static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)4713{4714struct packet_msg_prot *fence_pkt;4715dma_addr_t pkt_dma_addr;4716u32 fence_val, tmp, timeout_usec;4717dma_addr_t fence_dma_addr;4718u32 *fence_ptr;4719int rc;47204721if (hdev->pldm)4722timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;4723else4724timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;47254726fence_val = GAUDI_QMAN0_FENCE_VAL;47274728fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);4729if (!fence_ptr) {4730dev_err(hdev->dev,4731"Failed to allocate memory for H/W queue %d testing\n",4732hw_queue_id);4733return -ENOMEM;4734}47354736*fence_ptr = 0;47374738fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,4739&pkt_dma_addr);4740if (!fence_pkt) {4741dev_err(hdev->dev,4742"Failed to allocate packet for H/W queue %d testing\n",4743hw_queue_id);4744rc = -ENOMEM;4745goto free_fence_ptr;4746}47474748tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);4749tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);4750tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);47514752fence_pkt->ctl = cpu_to_le32(tmp);4753fence_pkt->value = cpu_to_le32(fence_val);4754fence_pkt->addr = cpu_to_le64(fence_dma_addr);47554756rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,4757sizeof(struct packet_msg_prot),4758pkt_dma_addr);4759if (rc) {4760dev_err(hdev->dev,4761"Failed to send fence packet to H/W queue %d\n",4762hw_queue_id);4763goto free_pkt;4764}47654766rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),47671000, timeout_usec, true);47684769hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);47704771if (rc == -ETIMEDOUT) {4772dev_err(hdev->dev,4773"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",4774hw_queue_id, (unsigned long long) fence_dma_addr, tmp);4775rc = -EIO;4776}47774778free_pkt:4779hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);4780free_fence_ptr:4781hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);4782return rc;4783}47844785static int gaudi_test_cpu_queue(struct hl_device *hdev)4786{4787struct gaudi_device *gaudi = hdev->asic_specific;47884789/*4790* check capability here as send_cpu_message() won't update the result4791* value if no capability4792*/4793if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))4794return 0;47954796return hl_fw_test_cpu_queue(hdev);4797}47984799static int gaudi_test_queues(struct hl_device *hdev)4800{4801int i, rc, ret_val = 0;48024803for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {4804if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {4805rc = gaudi_test_queue(hdev, i);4806if (rc)4807ret_val = -EINVAL;4808}4809}48104811rc = gaudi_test_cpu_queue(hdev);4812if (rc)4813ret_val = -EINVAL;48144815return ret_val;4816}48174818static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,4819gfp_t mem_flags, dma_addr_t *dma_handle)4820{4821void *kernel_addr;48224823if (size > GAUDI_DMA_POOL_BLK_SIZE)4824return NULL;48254826kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);48274828/* Shift to the device's base physical address of host memory */4829if (kernel_addr)4830*dma_handle += HOST_PHYS_BASE;48314832return kernel_addr;4833}48344835static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,4836dma_addr_t dma_addr)4837{4838/* Cancel the device's base physical address of host memory */4839dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;48404841dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);4842}48434844static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,4845size_t size, dma_addr_t *dma_handle)4846{4847return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);4848}48494850static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,4851size_t size, void *vaddr)4852{4853hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);4854}48554856static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)4857{4858struct scatterlist *sg, *sg_next_iter;4859u32 count, dma_desc_cnt;4860u64 len, len_next;4861dma_addr_t addr, addr_next;48624863dma_desc_cnt = 0;48644865for_each_sgtable_dma_sg(sgt, sg, count) {4866len = sg_dma_len(sg);4867addr = sg_dma_address(sg);48684869if (len == 0)4870break;48714872while ((count + 1) < sgt->nents) {4873sg_next_iter = sg_next(sg);4874len_next = sg_dma_len(sg_next_iter);4875addr_next = sg_dma_address(sg_next_iter);48764877if (len_next == 0)4878break;48794880if ((addr + len == addr_next) &&4881(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {4882len += len_next;4883count++;4884sg = sg_next_iter;4885} else {4886break;4887}4888}48894890dma_desc_cnt++;4891}48924893return dma_desc_cnt * sizeof(struct packet_lin_dma);4894}48954896static int gaudi_pin_memory_before_cs(struct hl_device *hdev,4897struct hl_cs_parser *parser,4898struct packet_lin_dma *user_dma_pkt,4899u64 addr, enum dma_data_direction dir)4900{4901struct hl_userptr *userptr;4902int rc;49034904if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),4905parser->job_userptr_list, &userptr))4906goto already_pinned;49074908userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);4909if (!userptr)4910return -ENOMEM;49114912rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),4913userptr);4914if (rc)4915goto free_userptr;49164917list_add_tail(&userptr->job_node, parser->job_userptr_list);49184919rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);4920if (rc) {4921dev_err(hdev->dev, "failed to map sgt with DMA region\n");4922goto unpin_memory;4923}49244925userptr->dma_mapped = true;4926userptr->dir = dir;49274928already_pinned:4929parser->patched_cb_size +=4930gaudi_get_dma_desc_list_size(hdev, userptr->sgt);49314932return 0;49334934unpin_memory:4935list_del(&userptr->job_node);4936hl_unpin_host_memory(hdev, userptr);4937free_userptr:4938kfree(userptr);4939return rc;4940}49414942static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,4943struct hl_cs_parser *parser,4944struct packet_lin_dma *user_dma_pkt,4945bool src_in_host)4946{4947enum dma_data_direction dir;4948bool skip_host_mem_pin = false, user_memset;4949u64 addr;4950int rc = 0;49514952user_memset = (le32_to_cpu(user_dma_pkt->ctl) &4953GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>4954GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;49554956if (src_in_host) {4957if (user_memset)4958skip_host_mem_pin = true;49594960dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");4961dir = DMA_TO_DEVICE;4962addr = le64_to_cpu(user_dma_pkt->src_addr);4963} else {4964dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");4965dir = DMA_FROM_DEVICE;4966addr = (le64_to_cpu(user_dma_pkt->dst_addr) &4967GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>4968GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;4969}49704971if (skip_host_mem_pin)4972parser->patched_cb_size += sizeof(*user_dma_pkt);4973else4974rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,4975addr, dir);49764977return rc;4978}49794980static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,4981struct hl_cs_parser *parser,4982struct packet_lin_dma *user_dma_pkt)4983{4984bool src_in_host = false;4985u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &4986GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>4987GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;49884989dev_dbg(hdev->dev, "DMA packet details:\n");4990dev_dbg(hdev->dev, "source == 0x%llx\n",4991le64_to_cpu(user_dma_pkt->src_addr));4992dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);4993dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));49944995/*4996* Special handling for DMA with size 0. Bypass all validations4997* because no transactions will be done except for WR_COMP, which4998* is not a security issue4999*/5000if (!le32_to_cpu(user_dma_pkt->tsize)) {5001parser->patched_cb_size += sizeof(*user_dma_pkt);5002return 0;5003}50045005if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)5006src_in_host = true;50075008return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,5009src_in_host);5010}50115012static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,5013struct hl_cs_parser *parser,5014struct packet_load_and_exe *user_pkt)5015{5016u32 cfg;50175018cfg = le32_to_cpu(user_pkt->cfg);50195020if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {5021dev_err(hdev->dev,5022"User not allowed to use Load and Execute\n");5023return -EPERM;5024}50255026parser->patched_cb_size += sizeof(struct packet_load_and_exe);50275028return 0;5029}50305031static int gaudi_validate_cb(struct hl_device *hdev,5032struct hl_cs_parser *parser, bool is_mmu)5033{5034u32 cb_parsed_length = 0;5035int rc = 0;50365037parser->patched_cb_size = 0;50385039/* cb_user_size is more than 0 so loop will always be executed */5040while (cb_parsed_length < parser->user_cb_size) {5041enum packet_id pkt_id;5042u16 pkt_size;5043struct gaudi_packet *user_pkt;50445045user_pkt = parser->user_cb->kernel_address + cb_parsed_length;50465047pkt_id = (enum packet_id) (5048(le64_to_cpu(user_pkt->header) &5049PACKET_HEADER_PACKET_ID_MASK) >>5050PACKET_HEADER_PACKET_ID_SHIFT);50515052if (!validate_packet_id(pkt_id)) {5053dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);5054rc = -EINVAL;5055break;5056}50575058pkt_size = gaudi_packet_sizes[pkt_id];5059cb_parsed_length += pkt_size;5060if (cb_parsed_length > parser->user_cb_size) {5061dev_err(hdev->dev,5062"packet 0x%x is out of CB boundary\n", pkt_id);5063rc = -EINVAL;5064break;5065}50665067switch (pkt_id) {5068case PACKET_MSG_PROT:5069dev_err(hdev->dev,5070"User not allowed to use MSG_PROT\n");5071rc = -EPERM;5072break;50735074case PACKET_CP_DMA:5075dev_err(hdev->dev, "User not allowed to use CP_DMA\n");5076rc = -EPERM;5077break;50785079case PACKET_STOP:5080dev_err(hdev->dev, "User not allowed to use STOP\n");5081rc = -EPERM;5082break;50835084case PACKET_WREG_BULK:5085dev_err(hdev->dev,5086"User not allowed to use WREG_BULK\n");5087rc = -EPERM;5088break;50895090case PACKET_LOAD_AND_EXE:5091rc = gaudi_validate_load_and_exe_pkt(hdev, parser,5092(struct packet_load_and_exe *) user_pkt);5093break;50945095case PACKET_LIN_DMA:5096parser->contains_dma_pkt = true;5097if (is_mmu)5098parser->patched_cb_size += pkt_size;5099else5100rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,5101(struct packet_lin_dma *) user_pkt);5102break;51035104case PACKET_WREG_32:5105case PACKET_MSG_LONG:5106case PACKET_MSG_SHORT:5107case PACKET_REPEAT:5108case PACKET_FENCE:5109case PACKET_NOP:5110case PACKET_ARB_POINT:5111parser->patched_cb_size += pkt_size;5112break;51135114default:5115dev_err(hdev->dev, "Invalid packet header 0x%x\n",5116pkt_id);5117rc = -EINVAL;5118break;5119}51205121if (rc)5122break;5123}51245125/*5126* The new CB should have space at the end for two MSG_PROT packets:5127* 1. Optional NOP padding for cacheline alignment5128* 2. A packet that will act as a completion packet5129* 3. A packet that will generate MSI interrupt5130*/5131if (parser->completion)5132parser->patched_cb_size += gaudi_get_patched_cb_extra_size(5133parser->patched_cb_size);51345135return rc;5136}51375138static int gaudi_patch_dma_packet(struct hl_device *hdev,5139struct hl_cs_parser *parser,5140struct packet_lin_dma *user_dma_pkt,5141struct packet_lin_dma *new_dma_pkt,5142u32 *new_dma_pkt_size)5143{5144struct hl_userptr *userptr;5145struct scatterlist *sg, *sg_next_iter;5146u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;5147u64 len, len_next;5148dma_addr_t dma_addr, dma_addr_next;5149u64 device_memory_addr, addr;5150enum dma_data_direction dir;5151struct sg_table *sgt;5152bool src_in_host = false;5153bool skip_host_mem_pin = false;5154bool user_memset;51555156ctl = le32_to_cpu(user_dma_pkt->ctl);51575158if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)5159src_in_host = true;51605161user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>5162GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;51635164if (src_in_host) {5165addr = le64_to_cpu(user_dma_pkt->src_addr);5166device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);5167dir = DMA_TO_DEVICE;5168if (user_memset)5169skip_host_mem_pin = true;5170} else {5171addr = le64_to_cpu(user_dma_pkt->dst_addr);5172device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);5173dir = DMA_FROM_DEVICE;5174}51755176if ((!skip_host_mem_pin) &&5177(!hl_userptr_is_pinned(hdev, addr,5178le32_to_cpu(user_dma_pkt->tsize),5179parser->job_userptr_list, &userptr))) {5180dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",5181addr, user_dma_pkt->tsize);5182return -EFAULT;5183}51845185if ((user_memset) && (dir == DMA_TO_DEVICE)) {5186memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));5187*new_dma_pkt_size = sizeof(*user_dma_pkt);5188return 0;5189}51905191user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;51925193sgt = userptr->sgt;5194dma_desc_cnt = 0;51955196for_each_sgtable_dma_sg(sgt, sg, count) {5197len = sg_dma_len(sg);5198dma_addr = sg_dma_address(sg);51995200if (len == 0)5201break;52025203while ((count + 1) < sgt->nents) {5204sg_next_iter = sg_next(sg);5205len_next = sg_dma_len(sg_next_iter);5206dma_addr_next = sg_dma_address(sg_next_iter);52075208if (len_next == 0)5209break;52105211if ((dma_addr + len == dma_addr_next) &&5212(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {5213len += len_next;5214count++;5215sg = sg_next_iter;5216} else {5217break;5218}5219}52205221ctl = le32_to_cpu(user_dma_pkt->ctl);5222if (likely(dma_desc_cnt))5223ctl &= ~GAUDI_PKT_CTL_EB_MASK;5224ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;5225new_dma_pkt->ctl = cpu_to_le32(ctl);5226new_dma_pkt->tsize = cpu_to_le32(len);52275228if (dir == DMA_TO_DEVICE) {5229new_dma_pkt->src_addr = cpu_to_le64(dma_addr);5230new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);5231} else {5232new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);5233new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);5234}52355236if (!user_memset)5237device_memory_addr += len;5238dma_desc_cnt++;5239new_dma_pkt++;5240}52415242if (!dma_desc_cnt) {5243dev_err(hdev->dev,5244"Error of 0 SG entries when patching DMA packet\n");5245return -EFAULT;5246}52475248/* Fix the last dma packet - wrcomp must be as user set it */5249new_dma_pkt--;5250new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);52515252*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);52535254return 0;5255}52565257static int gaudi_patch_cb(struct hl_device *hdev,5258struct hl_cs_parser *parser)5259{5260u32 cb_parsed_length = 0;5261u32 cb_patched_cur_length = 0;5262int rc = 0;52635264/* cb_user_size is more than 0 so loop will always be executed */5265while (cb_parsed_length < parser->user_cb_size) {5266enum packet_id pkt_id;5267u16 pkt_size;5268u32 new_pkt_size = 0;5269struct gaudi_packet *user_pkt, *kernel_pkt;52705271user_pkt = parser->user_cb->kernel_address + cb_parsed_length;5272kernel_pkt = parser->patched_cb->kernel_address +5273cb_patched_cur_length;52745275pkt_id = (enum packet_id) (5276(le64_to_cpu(user_pkt->header) &5277PACKET_HEADER_PACKET_ID_MASK) >>5278PACKET_HEADER_PACKET_ID_SHIFT);52795280if (!validate_packet_id(pkt_id)) {5281dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);5282rc = -EINVAL;5283break;5284}52855286pkt_size = gaudi_packet_sizes[pkt_id];5287cb_parsed_length += pkt_size;5288if (cb_parsed_length > parser->user_cb_size) {5289dev_err(hdev->dev,5290"packet 0x%x is out of CB boundary\n", pkt_id);5291rc = -EINVAL;5292break;5293}52945295switch (pkt_id) {5296case PACKET_LIN_DMA:5297rc = gaudi_patch_dma_packet(hdev, parser,5298(struct packet_lin_dma *) user_pkt,5299(struct packet_lin_dma *) kernel_pkt,5300&new_pkt_size);5301cb_patched_cur_length += new_pkt_size;5302break;53035304case PACKET_MSG_PROT:5305dev_err(hdev->dev,5306"User not allowed to use MSG_PROT\n");5307rc = -EPERM;5308break;53095310case PACKET_CP_DMA:5311dev_err(hdev->dev, "User not allowed to use CP_DMA\n");5312rc = -EPERM;5313break;53145315case PACKET_STOP:5316dev_err(hdev->dev, "User not allowed to use STOP\n");5317rc = -EPERM;5318break;53195320case PACKET_WREG_32:5321case PACKET_WREG_BULK:5322case PACKET_MSG_LONG:5323case PACKET_MSG_SHORT:5324case PACKET_REPEAT:5325case PACKET_FENCE:5326case PACKET_NOP:5327case PACKET_ARB_POINT:5328case PACKET_LOAD_AND_EXE:5329memcpy(kernel_pkt, user_pkt, pkt_size);5330cb_patched_cur_length += pkt_size;5331break;53325333default:5334dev_err(hdev->dev, "Invalid packet header 0x%x\n",5335pkt_id);5336rc = -EINVAL;5337break;5338}53395340if (rc)5341break;5342}53435344return rc;5345}53465347static int gaudi_parse_cb_mmu(struct hl_device *hdev,5348struct hl_cs_parser *parser)5349{5350u64 handle;5351u32 patched_cb_size;5352struct hl_cb *user_cb;5353int rc;53545355/*5356* The new CB should have space at the end for two MSG_PROT packets:5357* 1. Optional NOP padding for cacheline alignment5358* 2. A packet that will act as a completion packet5359* 3. A packet that will generate MSI interrupt5360*/5361if (parser->completion)5362parser->patched_cb_size = parser->user_cb_size +5363gaudi_get_patched_cb_extra_size(parser->user_cb_size);5364else5365parser->patched_cb_size = parser->user_cb_size;53665367rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,5368parser->patched_cb_size, false, false,5369&handle);53705371if (rc) {5372dev_err(hdev->dev,5373"Failed to allocate patched CB for DMA CS %d\n",5374rc);5375return rc;5376}53775378parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);5379/* hl_cb_get should never fail */5380if (!parser->patched_cb) {5381dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);5382rc = -EFAULT;5383goto out;5384}53855386/*5387* We are protected from overflow because the check5388* "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()5389* in the common code. That check is done only if is_kernel_allocated_cb is true.5390*5391* There is no option to reach here without going through that check because:5392* 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to5393* an external queue.5394* 2. For Gaudi, we only parse CBs that were submitted to the external queues.5395*/5396memcpy(parser->patched_cb->kernel_address,5397parser->user_cb->kernel_address,5398parser->user_cb_size);53995400patched_cb_size = parser->patched_cb_size;54015402/* Validate patched CB instead of user CB */5403user_cb = parser->user_cb;5404parser->user_cb = parser->patched_cb;5405rc = gaudi_validate_cb(hdev, parser, true);5406parser->user_cb = user_cb;54075408if (rc) {5409hl_cb_put(parser->patched_cb);5410goto out;5411}54125413if (patched_cb_size != parser->patched_cb_size) {5414dev_err(hdev->dev, "user CB size mismatch\n");5415hl_cb_put(parser->patched_cb);5416rc = -EINVAL;5417goto out;5418}54195420out:5421/*5422* Always call cb destroy here because we still have 1 reference5423* to it by calling cb_get earlier. After the job will be completed,5424* cb_put will release it, but here we want to remove it from the5425* idr5426*/5427hl_cb_destroy(&hdev->kernel_mem_mgr, handle);54285429return rc;5430}54315432static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,5433struct hl_cs_parser *parser)5434{5435u64 handle;5436int rc;54375438rc = gaudi_validate_cb(hdev, parser, false);54395440if (rc)5441goto free_userptr;54425443rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,5444parser->patched_cb_size, false, false,5445&handle);5446if (rc) {5447dev_err(hdev->dev,5448"Failed to allocate patched CB for DMA CS %d\n", rc);5449goto free_userptr;5450}54515452parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);5453/* hl_cb_get should never fail here */5454if (!parser->patched_cb) {5455dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);5456rc = -EFAULT;5457goto out;5458}54595460rc = gaudi_patch_cb(hdev, parser);54615462if (rc)5463hl_cb_put(parser->patched_cb);54645465out:5466/*5467* Always call cb destroy here because we still have 1 reference5468* to it by calling cb_get earlier. After the job will be completed,5469* cb_put will release it, but here we want to remove it from the5470* idr5471*/5472hl_cb_destroy(&hdev->kernel_mem_mgr, handle);54735474free_userptr:5475if (rc)5476hl_userptr_delete_list(hdev, parser->job_userptr_list);5477return rc;5478}54795480static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,5481struct hl_cs_parser *parser)5482{5483struct asic_fixed_properties *asic_prop = &hdev->asic_prop;5484struct gaudi_device *gaudi = hdev->asic_specific;5485u32 nic_queue_offset, nic_mask_q_id;54865487if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&5488(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {5489nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;5490nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));54915492if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {5493dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);5494return -EINVAL;5495}5496}54975498/* For internal queue jobs just check if CB address is valid */5499if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,5500parser->user_cb_size,5501asic_prop->sram_user_base_address,5502asic_prop->sram_end_address))5503return 0;55045505if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,5506parser->user_cb_size,5507asic_prop->dram_user_base_address,5508asic_prop->dram_end_address))5509return 0;55105511/* PMMU and HPMMU addresses are equal, check only one of them */5512if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,5513parser->user_cb_size,5514asic_prop->pmmu.start_addr,5515asic_prop->pmmu.end_addr))5516return 0;55175518dev_err(hdev->dev,5519"CB address 0x%px + 0x%x for internal QMAN is not valid\n",5520parser->user_cb, parser->user_cb_size);55215522return -EFAULT;5523}55245525static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)5526{5527struct gaudi_device *gaudi = hdev->asic_specific;55285529if (parser->queue_type == QUEUE_TYPE_INT)5530return gaudi_parse_cb_no_ext_queue(hdev, parser);55315532if (gaudi->hw_cap_initialized & HW_CAP_MMU)5533return gaudi_parse_cb_mmu(hdev, parser);5534else5535return gaudi_parse_cb_no_mmu(hdev, parser);5536}55375538static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,5539u32 len, u32 original_len, u64 cq_addr, u32 cq_val,5540u32 msi_vec, bool eb)5541{5542struct packet_msg_prot *cq_pkt;5543struct packet_nop *cq_padding;5544u64 msi_addr;5545u32 tmp;55465547cq_padding = kernel_address + original_len;5548cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);55495550while ((void *)cq_padding < (void *)cq_pkt) {5551cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));5552cq_padding++;5553}55545555tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);5556tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);55575558if (eb)5559tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);55605561cq_pkt->ctl = cpu_to_le32(tmp);5562cq_pkt->value = cpu_to_le32(cq_val);5563cq_pkt->addr = cpu_to_le64(cq_addr);55645565cq_pkt++;55665567tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);5568tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);5569cq_pkt->ctl = cpu_to_le32(tmp);5570cq_pkt->value = cpu_to_le32(1);5571msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;5572cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);5573}55745575static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)5576{5577WREG32(mmCPU_IF_EQ_RD_OFFS, val);5578}55795580static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,5581u32 size, u64 val)5582{5583struct packet_lin_dma *lin_dma_pkt;5584struct hl_cs_job *job;5585u32 cb_size, ctl, err_cause;5586struct hl_cb *cb;5587int rc;55885589cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);5590if (!cb)5591return -EFAULT;55925593lin_dma_pkt = cb->kernel_address;5594memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));5595cb_size = sizeof(*lin_dma_pkt);55965597ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);5598ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);5599ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);5600ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);5601ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);56025603lin_dma_pkt->ctl = cpu_to_le32(ctl);5604lin_dma_pkt->src_addr = cpu_to_le64(val);5605lin_dma_pkt->dst_addr |= cpu_to_le64(addr);5606lin_dma_pkt->tsize = cpu_to_le32(size);56075608job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);5609if (!job) {5610dev_err(hdev->dev, "Failed to allocate a new job\n");5611rc = -ENOMEM;5612goto release_cb;5613}56145615/* Verify DMA is OK */5616err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);5617if (err_cause && !hdev->init_done) {5618dev_dbg(hdev->dev,5619"Clearing DMA0 engine from errors (cause 0x%x)\n",5620err_cause);5621WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);5622}56235624job->id = 0;5625job->user_cb = cb;5626atomic_inc(&job->user_cb->cs_cnt);5627job->user_cb_size = cb_size;5628job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;5629job->patched_cb = job->user_cb;5630job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);56315632hl_debugfs_add_job(hdev, job);56335634rc = gaudi_send_job_on_qman0(hdev, job);5635hl_debugfs_remove_job(hdev, job);5636kfree(job);5637atomic_dec(&cb->cs_cnt);56385639/* Verify DMA is OK */5640err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);5641if (err_cause) {5642dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);5643rc = -EIO;5644if (!hdev->init_done) {5645dev_dbg(hdev->dev,5646"Clearing DMA0 engine from errors (cause 0x%x)\n",5647err_cause);5648WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);5649}5650}56515652release_cb:5653hl_cb_put(cb);5654hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);56555656return rc;5657}56585659static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,5660u32 num_regs, u32 val)5661{5662struct packet_msg_long *pkt;5663struct hl_cs_job *job;5664u32 cb_size, ctl;5665struct hl_cb *cb;5666int i, rc;56675668cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);56695670if (cb_size > SZ_2M) {5671dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);5672return -ENOMEM;5673}56745675cb = hl_cb_kernel_create(hdev, cb_size, false);5676if (!cb)5677return -EFAULT;56785679pkt = cb->kernel_address;56805681ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */5682ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);5683ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);5684ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);5685ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);56865687for (i = 0; i < num_regs ; i++, pkt++) {5688pkt->ctl = cpu_to_le32(ctl);5689pkt->value = cpu_to_le32(val);5690pkt->addr = cpu_to_le64(reg_base + (i * 4));5691}56925693job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);5694if (!job) {5695dev_err(hdev->dev, "Failed to allocate a new job\n");5696rc = -ENOMEM;5697goto release_cb;5698}56995700job->id = 0;5701job->user_cb = cb;5702atomic_inc(&job->user_cb->cs_cnt);5703job->user_cb_size = cb_size;5704job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;5705job->patched_cb = job->user_cb;5706job->job_cb_size = cb_size;57075708hl_debugfs_add_job(hdev, job);57095710rc = gaudi_send_job_on_qman0(hdev, job);5711hl_debugfs_remove_job(hdev, job);5712kfree(job);5713atomic_dec(&cb->cs_cnt);57145715release_cb:5716hl_cb_put(cb);5717hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);57185719return rc;5720}57215722static int gaudi_restore_sm_registers(struct hl_device *hdev)5723{5724u64 base_addr;5725u32 num_regs;5726int rc;57275728base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;5729num_regs = NUM_OF_SOB_IN_BLOCK;5730rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5731if (rc) {5732dev_err(hdev->dev, "failed resetting SM registers");5733return -ENOMEM;5734}57355736base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;5737num_regs = NUM_OF_SOB_IN_BLOCK;5738rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5739if (rc) {5740dev_err(hdev->dev, "failed resetting SM registers");5741return -ENOMEM;5742}57435744base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;5745num_regs = NUM_OF_SOB_IN_BLOCK;5746rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5747if (rc) {5748dev_err(hdev->dev, "failed resetting SM registers");5749return -ENOMEM;5750}57515752base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;5753num_regs = NUM_OF_MONITORS_IN_BLOCK;5754rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5755if (rc) {5756dev_err(hdev->dev, "failed resetting SM registers");5757return -ENOMEM;5758}57595760base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;5761num_regs = NUM_OF_MONITORS_IN_BLOCK;5762rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5763if (rc) {5764dev_err(hdev->dev, "failed resetting SM registers");5765return -ENOMEM;5766}57675768base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;5769num_regs = NUM_OF_MONITORS_IN_BLOCK;5770rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5771if (rc) {5772dev_err(hdev->dev, "failed resetting SM registers");5773return -ENOMEM;5774}57755776base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +5777(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);5778num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;5779rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5780if (rc) {5781dev_err(hdev->dev, "failed resetting SM registers");5782return -ENOMEM;5783}57845785base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +5786(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);5787num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;5788rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5789if (rc) {5790dev_err(hdev->dev, "failed resetting SM registers");5791return -ENOMEM;5792}57935794return 0;5795}57965797static void gaudi_restore_dma_registers(struct hl_device *hdev)5798{5799u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -5800mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;5801int i;58025803for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {5804u64 sob_addr = CFG_BASE +5805mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +5806(i * sob_delta);5807u32 dma_offset = i * DMA_CORE_OFFSET;58085809WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,5810lower_32_bits(sob_addr));5811WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,5812upper_32_bits(sob_addr));5813WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);58145815/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be5816* modified by the user for SRAM reduction5817*/5818if (i > 1)5819WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,58200x00000001);5821}5822}58235824static void gaudi_restore_qm_registers(struct hl_device *hdev)5825{5826u32 qman_offset;5827int i;58285829for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {5830qman_offset = i * DMA_QMAN_OFFSET;5831WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);5832}58335834for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {5835qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);5836WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);5837}58385839for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {5840qman_offset = i * TPC_QMAN_OFFSET;5841WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);5842}58435844for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {5845qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +5846(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;5847WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);5848}5849}58505851static int gaudi_restore_user_registers(struct hl_device *hdev)5852{5853int rc;58545855rc = gaudi_restore_sm_registers(hdev);5856if (rc)5857return rc;58585859gaudi_restore_dma_registers(hdev);5860gaudi_restore_qm_registers(hdev);58615862return 0;5863}58645865static int gaudi_context_switch(struct hl_device *hdev, u32 asid)5866{5867return 0;5868}58695870static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)5871{5872u32 size = hdev->asic_prop.mmu_pgt_size +5873hdev->asic_prop.mmu_cache_mng_size;5874struct gaudi_device *gaudi = hdev->asic_specific;5875u64 addr = hdev->asic_prop.mmu_pgt_addr;58765877if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))5878return 0;58795880return gaudi_memset_device_memory(hdev, addr, size, 0);5881}58825883static void gaudi_restore_phase_topology(struct hl_device *hdev)5884{58855886}58875888static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,5889u32 size_to_dma, dma_addr_t dma_addr)5890{5891u32 err_cause, val;5892u64 dma_offset;5893int rc;58945895dma_offset = dma_id * DMA_CORE_OFFSET;58965897WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));5898WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));5899WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));5900WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));5901WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);5902WREG32(mmDMA0_CORE_COMMIT + dma_offset,5903(1 << DMA0_CORE_COMMIT_LIN_SHIFT));59045905rc = hl_poll_timeout(5906hdev,5907mmDMA0_CORE_STS0 + dma_offset,5908val,5909((val & DMA0_CORE_STS0_BUSY_MASK) == 0),59100,59111000000);59125913if (rc) {5914dev_err(hdev->dev,5915"DMA %d timed-out during reading of 0x%llx\n",5916dma_id, addr);5917return -EIO;5918}59195920/* Verify DMA is OK */5921err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);5922if (err_cause) {5923dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);5924dev_dbg(hdev->dev,5925"Clearing DMA0 engine from errors (cause 0x%x)\n",5926err_cause);5927WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);59285929return -EIO;5930}59315932return 0;5933}59345935static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,5936void *blob_addr)5937{5938u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;5939u32 qm_glbl_sts0, qm_cgm_sts;5940u64 dma_offset, qm_offset;5941dma_addr_t dma_addr;5942void *kernel_addr;5943bool is_eng_idle;5944int rc = 0, dma_id;59455946kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);59475948if (!kernel_addr)5949return -ENOMEM;59505951hdev->asic_funcs->hw_queues_lock(hdev);59525953dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];5954dma_offset = dma_id * DMA_CORE_OFFSET;5955qm_offset = dma_id * DMA_QMAN_OFFSET;5956dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);5957qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);5958qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);5959is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&5960IS_DMA_IDLE(dma_core_sts0);59615962if (!is_eng_idle) {5963dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];5964dma_offset = dma_id * DMA_CORE_OFFSET;5965qm_offset = dma_id * DMA_QMAN_OFFSET;5966dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);5967qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);5968qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);5969is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&5970IS_DMA_IDLE(dma_core_sts0);59715972if (!is_eng_idle) {5973dev_err_ratelimited(hdev->dev,5974"Can't read via DMA because it is BUSY\n");5975rc = -EAGAIN;5976goto out;5977}5978}59795980cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);5981WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,59820xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);59835984/* TODO: remove this by mapping the DMA temporary buffer to the MMU5985* using the compute ctx ASID, if exists. If not, use the kernel ctx5986* ASID5987*/5988WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));59895990/* Verify DMA is OK */5991err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);5992if (err_cause) {5993dev_dbg(hdev->dev,5994"Clearing DMA0 engine from errors (cause 0x%x)\n",5995err_cause);5996WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);5997}59985999pos = 0;6000size_left = size;6001size_to_dma = SZ_2M;60026003while (size_left > 0) {60046005if (size_left < SZ_2M)6006size_to_dma = size_left;60076008rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,6009dma_addr);6010if (rc)6011break;60126013memcpy(blob_addr + pos, kernel_addr, size_to_dma);60146015if (size_left <= SZ_2M)6016break;60176018pos += SZ_2M;6019addr += SZ_2M;6020size_left -= SZ_2M;6021}60226023/* TODO: remove this by mapping the DMA temporary buffer to the MMU6024* using the compute ctx ASID, if exists. If not, use the kernel ctx6025* ASID6026*/6027WREG32_AND(mmDMA0_CORE_PROT + dma_offset,6028~BIT(DMA0_CORE_PROT_VAL_SHIFT));60296030WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);60316032out:6033hdev->asic_funcs->hw_queues_unlock(hdev);60346035hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);60366037return rc;6038}60396040static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)6041{6042struct gaudi_device *gaudi = hdev->asic_specific;60436044if (hdev->reset_info.hard_reset_pending)6045return U64_MAX;60466047return readq(hdev->pcie_bar[HBM_BAR_ID] +6048(addr - gaudi->hbm_bar_cur_addr));6049}60506051static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)6052{6053struct gaudi_device *gaudi = hdev->asic_specific;60546055if (hdev->reset_info.hard_reset_pending)6056return;60576058writeq(val, hdev->pcie_bar[HBM_BAR_ID] +6059(addr - gaudi->hbm_bar_cur_addr));6060}60616062void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)6063{6064/* mask to zero the MMBP and ASID bits */6065WREG32_AND(reg, ~0x7FF);6066WREG32_OR(reg, asid);6067}60686069static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)6070{6071struct gaudi_device *gaudi = hdev->asic_specific;60726073if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))6074return;60756076if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {6077dev_crit(hdev->dev, "asid %u is too big\n", asid);6078return;6079}60806081gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);6082gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);6083gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);6084gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);6085gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);60866087gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);6088gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);6089gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);6090gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);6091gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);60926093gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);6094gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);6095gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);6096gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);6097gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);60986099gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);6100gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);6101gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);6102gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);6103gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);61046105gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);6106gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);6107gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);6108gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);6109gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);61106111gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);6112gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);6113gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);6114gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);6115gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);61166117gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);6118gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);6119gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);6120gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);6121gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);61226123gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);6124gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);6125gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);6126gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);6127gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);61286129gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);6130gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);6131gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);6132gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);6133gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);6134gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);6135gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);6136gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);61376138gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);6139gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);6140gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);6141gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);6142gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);6143gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);6144gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);61456146gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);6147gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);6148gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);6149gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);6150gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);6151gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);6152gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);61536154gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);6155gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);6156gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);6157gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);6158gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);6159gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);6160gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);61616162gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);6163gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);6164gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);6165gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);6166gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);6167gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);6168gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);61696170gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);6171gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);6172gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);6173gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);6174gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);6175gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);6176gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);61776178gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);6179gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);6180gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);6181gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);6182gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);6183gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);6184gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);61856186gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);6187gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);6188gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);6189gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);6190gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);6191gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);6192gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);61936194gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);6195gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);6196gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);6197gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);6198gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);6199gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);6200gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);62016202gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);6203gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);6204gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);6205gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);6206gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);6207gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);6208gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);6209gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);6210gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);6211gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);62126213gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);6214gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);6215gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);6216gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);6217gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);6218gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);6219gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);6220gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);6221gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);6222gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);6223gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);6224gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);62256226if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {6227gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,6228asid);6229gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,6230asid);6231gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,6232asid);6233gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,6234asid);6235gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,6236asid);6237}62386239if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {6240gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,6241asid);6242gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,6243asid);6244gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,6245asid);6246gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,6247asid);6248gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,6249asid);6250}62516252if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {6253gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,6254asid);6255gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,6256asid);6257gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,6258asid);6259gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,6260asid);6261gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,6262asid);6263}62646265if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {6266gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,6267asid);6268gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,6269asid);6270gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,6271asid);6272gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,6273asid);6274gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,6275asid);6276}62776278if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {6279gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,6280asid);6281gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,6282asid);6283gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,6284asid);6285gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,6286asid);6287gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,6288asid);6289}62906291if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {6292gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,6293asid);6294gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,6295asid);6296gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,6297asid);6298gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,6299asid);6300gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,6301asid);6302}63036304if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {6305gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,6306asid);6307gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,6308asid);6309gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,6310asid);6311gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,6312asid);6313gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,6314asid);6315}63166317if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {6318gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,6319asid);6320gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,6321asid);6322gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,6323asid);6324gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,6325asid);6326gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,6327asid);6328}63296330if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {6331gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,6332asid);6333gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,6334asid);6335gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,6336asid);6337gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,6338asid);6339gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,6340asid);6341}63426343if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {6344gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,6345asid);6346gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,6347asid);6348gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,6349asid);6350gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,6351asid);6352gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,6353asid);6354}63556356gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);6357gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);6358}63596360static int gaudi_send_job_on_qman0(struct hl_device *hdev,6361struct hl_cs_job *job)6362{6363struct packet_msg_prot *fence_pkt;6364u32 *fence_ptr;6365dma_addr_t fence_dma_addr;6366struct hl_cb *cb;6367u32 tmp, timeout, dma_offset;6368int rc;63696370if (hdev->pldm)6371timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;6372else6373timeout = HL_DEVICE_TIMEOUT_USEC;63746375fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);6376if (!fence_ptr) {6377dev_err(hdev->dev,6378"Failed to allocate fence memory for QMAN0\n");6379return -ENOMEM;6380}63816382cb = job->patched_cb;63836384fence_pkt = cb->kernel_address +6385job->job_cb_size - sizeof(struct packet_msg_prot);63866387tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);6388tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);6389tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);63906391fence_pkt->ctl = cpu_to_le32(tmp);6392fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);6393fence_pkt->addr = cpu_to_le64(fence_dma_addr);63946395dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;63966397WREG32(mmDMA0_CORE_PROT + dma_offset,6398BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));63996400rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,6401job->job_cb_size, cb->bus_address);6402if (rc) {6403dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);6404goto free_fence_ptr;6405}64066407rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,6408(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,6409timeout, true);64106411hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);64126413if (rc == -ETIMEDOUT) {6414dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);6415goto free_fence_ptr;6416}64176418free_fence_ptr:6419WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));64206421hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);6422return rc;6423}64246425static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)6426{6427if (event_type >= GAUDI_EVENT_SIZE)6428goto event_not_supported;64296430if (!gaudi_irq_map_table[event_type].valid)6431goto event_not_supported;64326433snprintf(desc, size, gaudi_irq_map_table[event_type].name);64346435return;64366437event_not_supported:6438snprintf(desc, size, "N/A");6439}64406441static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,6442bool is_write, u16 *engine_id_1,6443u16 *engine_id_2)6444{6445u32 dma_id[2], dma_offset, err_cause[2], mask, i;64466447mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :6448DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;64496450switch (x_y) {6451case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:6452case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:6453dma_id[0] = 0;6454dma_id[1] = 2;6455break;6456case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:6457case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:6458dma_id[0] = 1;6459dma_id[1] = 3;6460break;6461case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:6462case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:6463dma_id[0] = 4;6464dma_id[1] = 6;6465break;6466case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:6467case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:6468dma_id[0] = 5;6469dma_id[1] = 7;6470break;6471default:6472goto unknown_initiator;6473}64746475for (i = 0 ; i < 2 ; i++) {6476dma_offset = dma_id[i] * DMA_CORE_OFFSET;6477err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);6478}64796480switch (x_y) {6481case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:6482case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:6483if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {6484*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;6485return "DMA0";6486} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {6487*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;6488return "DMA2";6489} else {6490*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;6491*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;6492return "DMA0 or DMA2";6493}6494case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:6495case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:6496if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {6497*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;6498return "DMA1";6499} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {6500*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;6501return "DMA3";6502} else {6503*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;6504*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;6505return "DMA1 or DMA3";6506}6507case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:6508case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:6509if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {6510*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;6511return "DMA4";6512} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {6513*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;6514return "DMA6";6515} else {6516*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;6517*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;6518return "DMA4 or DMA6";6519}6520case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:6521case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:6522if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {6523*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;6524return "DMA5";6525} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {6526*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;6527return "DMA7";6528} else {6529*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;6530*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;6531return "DMA5 or DMA7";6532}6533}65346535unknown_initiator:6536return "unknown initiator";6537}65386539static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,6540u16 *engine_id_1, u16 *engine_id_2)6541{6542u32 val, x_y, axi_id;65436544val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :6545RREG32(mmMMU_UP_RAZWI_READ_ID);6546x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |6547(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));6548axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<6549RAZWI_INITIATOR_AXI_ID_SHIFT);65506551switch (x_y) {6552case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:6553if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {6554*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;6555return "TPC0";6556}6557if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {6558*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;6559return "NIC0";6560}6561break;6562case RAZWI_INITIATOR_ID_X_Y_TPC1:6563*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;6564return "TPC1";6565case RAZWI_INITIATOR_ID_X_Y_MME0_0:6566case RAZWI_INITIATOR_ID_X_Y_MME0_1:6567*engine_id_1 = GAUDI_ENGINE_ID_MME_0;6568return "MME0";6569case RAZWI_INITIATOR_ID_X_Y_MME1_0:6570case RAZWI_INITIATOR_ID_X_Y_MME1_1:6571*engine_id_1 = GAUDI_ENGINE_ID_MME_1;6572return "MME1";6573case RAZWI_INITIATOR_ID_X_Y_TPC2:6574*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;6575return "TPC2";6576case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:6577if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {6578*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;6579return "TPC3";6580}6581/* PCI, CPU or PSOC does not have engine id*/6582if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))6583return "PCI";6584if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))6585return "CPU";6586if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))6587return "PSOC";6588break;6589case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:6590case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:6591case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:6592case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:6593case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:6594case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:6595case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:6596case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:6597return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,6598engine_id_1, engine_id_2);6599case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:6600if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {6601*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;6602return "TPC4";6603}6604if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {6605*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;6606return "NIC1";6607}6608if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {6609*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;6610return "NIC2";6611}6612break;6613case RAZWI_INITIATOR_ID_X_Y_TPC5:6614*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;6615return "TPC5";6616case RAZWI_INITIATOR_ID_X_Y_MME2_0:6617case RAZWI_INITIATOR_ID_X_Y_MME2_1:6618*engine_id_1 = GAUDI_ENGINE_ID_MME_2;6619return "MME2";6620case RAZWI_INITIATOR_ID_X_Y_MME3_0:6621case RAZWI_INITIATOR_ID_X_Y_MME3_1:6622*engine_id_1 = GAUDI_ENGINE_ID_MME_3;6623return "MME3";6624case RAZWI_INITIATOR_ID_X_Y_TPC6:6625*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;6626return "TPC6";6627case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:6628if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {6629*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;6630return "TPC7";6631}6632if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {6633*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;6634return "NIC4";6635}6636if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {6637*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;6638return "NIC5";6639}6640break;6641default:6642break;6643}66446645dev_err(hdev->dev,6646"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",6647val,6648(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,6649(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,6650(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &6651RAZWI_INITIATOR_AXI_ID_MASK);66526653return "unknown initiator";6654}66556656static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,6657u16 *engine_id_2, bool *is_read, bool *is_write)6658{66596660if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {6661dev_err_ratelimited(hdev->dev,6662"RAZWI event caused by illegal write of %s\n",6663gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));6664WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);6665*is_write = true;6666}66676668if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {6669dev_err_ratelimited(hdev->dev,6670"RAZWI event caused by illegal read of %s\n",6671gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));6672WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);6673*is_read = true;6674}6675}66766677static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)6678{6679struct gaudi_device *gaudi = hdev->asic_specific;6680u32 val;66816682if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))6683return;66846685val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);6686if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {6687*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;6688*addr <<= 32;6689*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);66906691dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);6692hl_handle_page_fault(hdev, *addr, 0, true, event_mask);66936694WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);6695}66966697val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);6698if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {6699*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;6700*addr <<= 32;6701*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);67026703dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);67046705WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);6706}6707}67086709/*6710* +-------------------+------------------------------------------------------+6711* | Configuration Reg | Description |6712* | Address | |6713* +-------------------+------------------------------------------------------+6714* | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|6715* | |0xF30 memory wrappers 31:0 (MSB to LSB) |6716* | |0xF34 memory wrappers 63:32 |6717* | |0xF38 memory wrappers 95:64 |6718* | |0xF3C memory wrappers 127:96 |6719* +-------------------+------------------------------------------------------+6720* | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|6721* | |0xF40 memory wrappers 31:0 (MSB to LSB) |6722* | |0xF44 memory wrappers 63:32 |6723* | |0xF48 memory wrappers 95:64 |6724* | |0xF4C memory wrappers 127:96 |6725* +-------------------+------------------------------------------------------+6726*/6727static int gaudi_extract_ecc_info(struct hl_device *hdev,6728struct ecc_info_extract_params *params, u64 *ecc_address,6729u64 *ecc_syndrom, u8 *memory_wrapper_idx)6730{6731u32 i, num_mem_regs, reg, err_bit;6732u64 err_addr, err_word = 0;67336734num_mem_regs = params->num_memories / 32 +6735((params->num_memories % 32) ? 1 : 0);67366737if (params->block_address >= CFG_BASE)6738params->block_address -= CFG_BASE;67396740if (params->derr)6741err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;6742else6743err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;67446745/* Set invalid wrapper index */6746*memory_wrapper_idx = 0xFF;67476748/* Iterate through memory wrappers, a single bit must be set */6749for (i = 0 ; i < num_mem_regs ; i++) {6750err_addr += i * 4;6751err_word = RREG32(err_addr);6752if (err_word) {6753err_bit = __ffs(err_word);6754*memory_wrapper_idx = err_bit + (32 * i);6755break;6756}6757}67586759if (*memory_wrapper_idx == 0xFF) {6760dev_err(hdev->dev, "ECC error information cannot be found\n");6761return -EINVAL;6762}67636764WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,6765*memory_wrapper_idx);67666767*ecc_address =6768RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);6769*ecc_syndrom =6770RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);67716772/* Clear error indication */6773reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);6774if (params->derr)6775reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);6776else6777reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);67786779WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);67806781return 0;6782}67836784/*6785* gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap6786*6787* @idx: the current pi/ci value6788* @q_len: the queue length (power of 2)6789*6790* @return the cyclically decremented index6791*/6792static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)6793{6794u32 mask = q_len - 1;67956796/*6797* modular decrement is equivalent to adding (queue_size -1)6798* later we take LSBs to make sure the value is in the6799* range [0, queue_len - 1]6800*/6801return (idx + q_len - 1) & mask;6802}68036804/**6805* gaudi_handle_sw_config_stream_data - print SW config stream data6806*6807* @hdev: pointer to the habanalabs device structure6808* @stream: the QMAN's stream6809* @qman_base: base address of QMAN registers block6810* @event_mask: mask of the last events occurred6811*/6812static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,6813u64 qman_base, u64 event_mask)6814{6815u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;6816u32 cq_ptr_lo_off, size;68176818cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;68196820cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +6821stream * cq_ptr_lo_off;6822cq_ptr_hi = cq_ptr_lo +6823(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);6824cq_tsize = cq_ptr_lo +6825(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);68266827cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);6828size = RREG32(cq_tsize);6829dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",6830stream, cq_ptr, size);68316832if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {6833hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;6834hdev->captured_err_info.undef_opcode.cq_size = size;6835hdev->captured_err_info.undef_opcode.stream_id = stream;6836}6837}68386839/**6840* gaudi_handle_last_pqes_on_err - print last PQEs on error6841*6842* @hdev: pointer to the habanalabs device structure6843* @qid_base: first QID of the QMAN (out of 4 streams)6844* @stream: the QMAN's stream6845* @qman_base: base address of QMAN registers block6846* @event_mask: mask of the last events occurred6847* @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)6848*/6849static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,6850u32 stream, u64 qman_base,6851u64 event_mask,6852bool pr_sw_conf)6853{6854u32 ci, qm_ci_stream_off, queue_len;6855struct hl_hw_queue *q;6856u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];6857int i;68586859q = &hdev->kernel_queues[qid_base + stream];68606861qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;6862pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +6863stream * qm_ci_stream_off;68646865queue_len = (q->queue_type == QUEUE_TYPE_INT) ?6866q->int_queue_len : HL_QUEUE_LENGTH;68676868hdev->asic_funcs->hw_queues_lock(hdev);68696870if (pr_sw_conf)6871gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);68726873ci = RREG32(pq_ci);68746875/* we should start printing form ci -1 */6876ci = gaudi_queue_idx_dec(ci, queue_len);6877memset(addr, 0, sizeof(addr));68786879for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {6880struct hl_bd *bd;6881u32 len;68826883bd = q->kernel_address;6884bd += ci;68856886len = le32_to_cpu(bd->len);6887/* len 0 means uninitialized entry- break */6888if (!len)6889break;68906891addr[i] = le64_to_cpu(bd->ptr);68926893dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",6894stream, ci, addr[i], len);68956896/* get previous ci, wrap if needed */6897ci = gaudi_queue_idx_dec(ci, queue_len);6898}68996900if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {6901struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;6902u32 arr_idx = undef_opcode->cb_addr_streams_len;69036904if (arr_idx == 0) {6905undef_opcode->timestamp = ktime_get();6906undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];6907}69086909memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));6910undef_opcode->cb_addr_streams_len++;6911}69126913hdev->asic_funcs->hw_queues_unlock(hdev);6914}69156916/**6917* handle_qman_data_on_err - extract QMAN data on error6918*6919* @hdev: pointer to the habanalabs device structure6920* @qid_base: first QID of the QMAN (out of 4 streams)6921* @stream: the QMAN's stream6922* @qman_base: base address of QMAN registers block6923* @event_mask: mask of the last events occurred6924*6925* This function attempt to exatract as much data as possible on QMAN error.6926* On upper CP print the SW config stream data and last 8 PQEs.6927* On lower CP print SW config data and last PQEs of ALL 4 upper CPs6928*/6929static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,6930u32 stream, u64 qman_base, u64 event_mask)6931{6932u32 i;69336934if (stream != QMAN_STREAMS) {6935gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,6936qman_base, event_mask, true);6937return;6938}69396940/* handle Lower-CP */6941gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);69426943for (i = 0; i < QMAN_STREAMS; i++)6944gaudi_handle_last_pqes_on_err(hdev, qid_base, i,6945qman_base, event_mask, false);6946}69476948static void gaudi_handle_qman_err_generic(struct hl_device *hdev,6949const char *qm_name,6950u64 qman_base,6951u32 qid_base,6952u64 *event_mask)6953{6954u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;6955u64 glbl_sts_addr, arb_err_addr;6956char reg_desc[32];69576958glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);6959arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);69606961/* Iterate through all stream GLBL_STS1 registers + Lower CP */6962for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {6963glbl_sts_clr_val = 0;6964glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);69656966if (!glbl_sts_val)6967continue;69686969if (i == QMAN_STREAMS)6970snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");6971else6972snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);69736974for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {6975if (glbl_sts_val & BIT(j)) {6976dev_err_ratelimited(hdev->dev,6977"%s %s. err cause: %s\n",6978qm_name, reg_desc,6979gaudi_qman_error_cause[j]);6980glbl_sts_clr_val |= BIT(j);6981}6982}6983/* check for undefined opcode */6984if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&6985hdev->captured_err_info.undef_opcode.write_enable) {6986memset(&hdev->captured_err_info.undef_opcode, 0,6987sizeof(hdev->captured_err_info.undef_opcode));69886989hdev->captured_err_info.undef_opcode.write_enable = false;6990*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;6991}69926993/* Write 1 clear errors */6994if (!hdev->stop_on_err)6995WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);6996else6997handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);6998}69997000arb_err_val = RREG32(arb_err_addr);70017002if (!arb_err_val)7003return;70047005for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {7006if (arb_err_val & BIT(j)) {7007dev_err_ratelimited(hdev->dev,7008"%s ARB_ERR. err cause: %s\n",7009qm_name,7010gaudi_qman_arb_error_cause[j]);7011}7012}7013}70147015static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,7016struct hl_eq_sm_sei_data *sei_data)7017{7018u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;70197020/* Flip the bits as the enum is ordered in the opposite way */7021index = (index ^ 0x3) & 0x3;70227023switch (sei_data->sei_cause) {7024case SM_SEI_SO_OVERFLOW:7025dev_err_ratelimited(hdev->dev,7026"%s SEI Error: SOB Group %u overflow/underflow",7027gaudi_sync_manager_names[index],7028le32_to_cpu(sei_data->sei_log));7029break;7030case SM_SEI_LBW_4B_UNALIGNED:7031dev_err_ratelimited(hdev->dev,7032"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",7033gaudi_sync_manager_names[index],7034le32_to_cpu(sei_data->sei_log));7035break;7036case SM_SEI_AXI_RESPONSE_ERR:7037dev_err_ratelimited(hdev->dev,7038"%s SEI Error: AXI ID %u response error",7039gaudi_sync_manager_names[index],7040le32_to_cpu(sei_data->sei_log));7041break;7042default:7043dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",7044le32_to_cpu(sei_data->sei_log));7045break;7046}7047}70487049static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,7050struct hl_eq_ecc_data *ecc_data)7051{7052struct ecc_info_extract_params params;7053u64 ecc_address = 0, ecc_syndrom = 0;7054u8 index, memory_wrapper_idx = 0;7055bool extract_info_from_fw;7056int rc;70577058if (hdev->asic_prop.fw_security_enabled) {7059extract_info_from_fw = true;7060goto extract_ecc_info;7061}70627063switch (event_type) {7064case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:7065case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:7066extract_info_from_fw = true;7067break;7068case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:7069index = event_type - GAUDI_EVENT_TPC0_SERR;7070params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;7071params.num_memories = 90;7072params.derr = false;7073extract_info_from_fw = false;7074break;7075case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:7076index = event_type - GAUDI_EVENT_TPC0_DERR;7077params.block_address =7078mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;7079params.num_memories = 90;7080params.derr = true;7081extract_info_from_fw = false;7082break;7083case GAUDI_EVENT_MME0_ACC_SERR:7084case GAUDI_EVENT_MME1_ACC_SERR:7085case GAUDI_EVENT_MME2_ACC_SERR:7086case GAUDI_EVENT_MME3_ACC_SERR:7087index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;7088params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;7089params.num_memories = 128;7090params.derr = false;7091extract_info_from_fw = false;7092break;7093case GAUDI_EVENT_MME0_ACC_DERR:7094case GAUDI_EVENT_MME1_ACC_DERR:7095case GAUDI_EVENT_MME2_ACC_DERR:7096case GAUDI_EVENT_MME3_ACC_DERR:7097index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;7098params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;7099params.num_memories = 128;7100params.derr = true;7101extract_info_from_fw = false;7102break;7103case GAUDI_EVENT_MME0_SBAB_SERR:7104case GAUDI_EVENT_MME1_SBAB_SERR:7105case GAUDI_EVENT_MME2_SBAB_SERR:7106case GAUDI_EVENT_MME3_SBAB_SERR:7107index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;7108params.block_address =7109mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;7110params.num_memories = 33;7111params.derr = false;7112extract_info_from_fw = false;7113break;7114case GAUDI_EVENT_MME0_SBAB_DERR:7115case GAUDI_EVENT_MME1_SBAB_DERR:7116case GAUDI_EVENT_MME2_SBAB_DERR:7117case GAUDI_EVENT_MME3_SBAB_DERR:7118index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;7119params.block_address =7120mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;7121params.num_memories = 33;7122params.derr = true;7123extract_info_from_fw = false;7124break;7125default:7126return;7127}71287129extract_ecc_info:7130if (extract_info_from_fw) {7131ecc_address = le64_to_cpu(ecc_data->ecc_address);7132ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);7133memory_wrapper_idx = ecc_data->memory_wrapper_idx;7134} else {7135rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,7136&ecc_syndrom, &memory_wrapper_idx);7137if (rc)7138return;7139}71407141dev_err(hdev->dev,7142"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",7143ecc_address, ecc_syndrom, memory_wrapper_idx);7144}71457146static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)7147{7148u64 qman_base;7149char desc[32];7150u32 qid_base;7151u8 index;71527153switch (event_type) {7154case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:7155index = event_type - GAUDI_EVENT_TPC0_QM;7156qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;7157qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;7158snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);7159break;7160case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:7161if (event_type == GAUDI_EVENT_MME0_QM) {7162index = 0;7163qid_base = GAUDI_QUEUE_ID_MME_0_0;7164} else { /* event_type == GAUDI_EVENT_MME2_QM */7165index = 2;7166qid_base = GAUDI_QUEUE_ID_MME_1_0;7167}7168qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;7169snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);7170break;7171case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:7172index = event_type - GAUDI_EVENT_DMA0_QM;7173qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;7174/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */7175if (index > 1)7176qid_base++;7177qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;7178snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);7179break;7180case GAUDI_EVENT_NIC0_QM0:7181qid_base = GAUDI_QUEUE_ID_NIC_0_0;7182qman_base = mmNIC0_QM0_BASE;7183snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");7184break;7185case GAUDI_EVENT_NIC0_QM1:7186qid_base = GAUDI_QUEUE_ID_NIC_1_0;7187qman_base = mmNIC0_QM1_BASE;7188snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");7189break;7190case GAUDI_EVENT_NIC1_QM0:7191qid_base = GAUDI_QUEUE_ID_NIC_2_0;7192qman_base = mmNIC1_QM0_BASE;7193snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");7194break;7195case GAUDI_EVENT_NIC1_QM1:7196qid_base = GAUDI_QUEUE_ID_NIC_3_0;7197qman_base = mmNIC1_QM1_BASE;7198snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");7199break;7200case GAUDI_EVENT_NIC2_QM0:7201qid_base = GAUDI_QUEUE_ID_NIC_4_0;7202qman_base = mmNIC2_QM0_BASE;7203snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");7204break;7205case GAUDI_EVENT_NIC2_QM1:7206qid_base = GAUDI_QUEUE_ID_NIC_5_0;7207qman_base = mmNIC2_QM1_BASE;7208snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");7209break;7210case GAUDI_EVENT_NIC3_QM0:7211qid_base = GAUDI_QUEUE_ID_NIC_6_0;7212qman_base = mmNIC3_QM0_BASE;7213snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");7214break;7215case GAUDI_EVENT_NIC3_QM1:7216qid_base = GAUDI_QUEUE_ID_NIC_7_0;7217qman_base = mmNIC3_QM1_BASE;7218snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");7219break;7220case GAUDI_EVENT_NIC4_QM0:7221qid_base = GAUDI_QUEUE_ID_NIC_8_0;7222qman_base = mmNIC4_QM0_BASE;7223snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");7224break;7225case GAUDI_EVENT_NIC4_QM1:7226qid_base = GAUDI_QUEUE_ID_NIC_9_0;7227qman_base = mmNIC4_QM1_BASE;7228snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");7229break;7230default:7231return;7232}72337234gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);7235}72367237static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,7238bool check_razwi, u64 *event_mask)7239{7240bool is_read = false, is_write = false;7241u16 engine_id[2], num_of_razwi_eng = 0;7242char desc[64] = "";7243u64 razwi_addr = 0;7244u8 razwi_flags = 0;72457246/*7247* Init engine id by default as not valid and only if razwi initiated from engine with7248* engine id it will get valid value.7249*/7250engine_id[0] = HL_RAZWI_NA_ENG_ID;7251engine_id[1] = HL_RAZWI_NA_ENG_ID;72527253gaudi_get_event_desc(event_type, desc, sizeof(desc));7254dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",7255event_type, desc);72567257if (check_razwi) {7258gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,7259&is_write);7260gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);72617262if (is_read)7263razwi_flags |= HL_RAZWI_READ;7264if (is_write)7265razwi_flags |= HL_RAZWI_WRITE;72667267if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {7268if (engine_id[1] != HL_RAZWI_NA_ENG_ID)7269num_of_razwi_eng = 2;7270else7271num_of_razwi_eng = 1;7272}72737274if (razwi_flags)7275hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,7276razwi_flags, event_mask);7277}7278}72797280static void gaudi_print_out_of_sync_info(struct hl_device *hdev,7281struct cpucp_pkt_sync_err *sync_err)7282{7283struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];72847285dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",7286le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));7287}72887289static void gaudi_print_fw_alive_info(struct hl_device *hdev,7290struct hl_eq_fw_alive *fw_alive)7291{7292dev_err(hdev->dev,7293"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",7294(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",7295le32_to_cpu(fw_alive->process_id),7296le32_to_cpu(fw_alive->thread_id),7297le64_to_cpu(fw_alive->uptime_seconds));7298}72997300static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,7301void *data)7302{7303char desc[64] = "", *type;7304struct eq_nic_sei_event *eq_nic_sei = data;7305u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;73067307switch (eq_nic_sei->axi_error_cause) {7308case RXB:7309type = "RXB";7310break;7311case RXE:7312type = "RXE";7313break;7314case TXS:7315type = "TXS";7316break;7317case TXE:7318type = "TXE";7319break;7320case QPC_RESP:7321type = "QPC_RESP";7322break;7323case NON_AXI_ERR:7324type = "NON_AXI_ERR";7325break;7326case TMR:7327type = "TMR";7328break;7329default:7330dev_err(hdev->dev, "unknown NIC AXI cause %d\n",7331eq_nic_sei->axi_error_cause);7332type = "N/A";7333break;7334}73357336snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,7337eq_nic_sei->id);7338dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",7339event_type, desc);7340}73417342static int gaudi_compute_reset_late_init(struct hl_device *hdev)7343{7344/* GAUDI doesn't support any reset except hard-reset */7345return -EPERM;7346}73477348static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,7349struct hl_eq_hbm_ecc_data *hbm_ecc_data)7350{7351u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;7352int rc = 0;73537354if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &7355CPU_BOOT_DEV_STS0_HBM_ECC_EN) {7356if (!hbm_ecc_data) {7357dev_err(hdev->dev, "No FW ECC data");7358return 0;7359}73607361wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,7362le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7363rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,7364le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7365ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,7366le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7367derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,7368le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7369serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,7370le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7371type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,7372le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7373ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,7374le32_to_cpu(hbm_ecc_data->hbm_ecc_info));73757376dev_err(hdev->dev,7377"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",7378device, ch, wr_par, rd_par, ca_par, serr, derr);7379dev_err(hdev->dev,7380"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",7381device, ch, hbm_ecc_data->first_addr, type,7382hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,7383hbm_ecc_data->dec_cnt);7384return 0;7385}73867387if (hdev->asic_prop.fw_security_enabled) {7388dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");7389return 0;7390}73917392base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;7393for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {7394val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);7395val = (val & 0xFF) | ((val >> 8) & 0xFF);7396if (val) {7397rc = -EIO;7398dev_err(hdev->dev,7399"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",7400device, ch * 2, val & 0x1, (val >> 1) & 0x1,7401(val >> 2) & 0x1, (val >> 3) & 0x1,7402(val >> 4) & 0x1);74037404val2 = RREG32(base + ch * 0x1000 + 0x060);7405dev_err(hdev->dev,7406"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",7407device, ch * 2,7408RREG32(base + ch * 0x1000 + 0x064),7409(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,7410(val2 & 0xFF0000) >> 16,7411(val2 & 0xFF000000) >> 24);7412}74137414val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);7415val = (val & 0xFF) | ((val >> 8) & 0xFF);7416if (val) {7417rc = -EIO;7418dev_err(hdev->dev,7419"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",7420device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,7421(val >> 2) & 0x1, (val >> 3) & 0x1,7422(val >> 4) & 0x1);74237424val2 = RREG32(base + ch * 0x1000 + 0x070);7425dev_err(hdev->dev,7426"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",7427device, ch * 2 + 1,7428RREG32(base + ch * 0x1000 + 0x074),7429(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,7430(val2 & 0xFF0000) >> 16,7431(val2 & 0xFF000000) >> 24);7432}74337434/* Clear interrupts */7435RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);7436RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);7437WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);7438WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);7439RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);7440RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);7441}74427443val = RREG32(base + 0x8F30);7444val2 = RREG32(base + 0x8F34);7445if (val | val2) {7446rc = -EIO;7447dev_err(hdev->dev,7448"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",7449device, val, val2);7450}7451val = RREG32(base + 0x8F40);7452val2 = RREG32(base + 0x8F44);7453if (val | val2) {7454rc = -EIO;7455dev_err(hdev->dev,7456"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",7457device, val, val2);7458}74597460return rc;7461}74627463static int gaudi_hbm_event_to_dev(u16 hbm_event_type)7464{7465switch (hbm_event_type) {7466case GAUDI_EVENT_HBM0_SPI_0:7467case GAUDI_EVENT_HBM0_SPI_1:7468return 0;7469case GAUDI_EVENT_HBM1_SPI_0:7470case GAUDI_EVENT_HBM1_SPI_1:7471return 1;7472case GAUDI_EVENT_HBM2_SPI_0:7473case GAUDI_EVENT_HBM2_SPI_1:7474return 2;7475case GAUDI_EVENT_HBM3_SPI_0:7476case GAUDI_EVENT_HBM3_SPI_1:7477return 3;7478default:7479break;7480}74817482/* Should never happen */7483return 0;7484}74857486static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,7487char *interrupt_name)7488{7489u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;7490bool soft_reset_required = false;74917492tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &7493TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;74947495for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)7496if (tpc_interrupts_cause & BIT(i)) {7497dev_err_ratelimited(hdev->dev,7498"TPC%d_%s interrupt cause: %s\n",7499tpc_id, interrupt_name,7500gaudi_tpc_interrupts_cause[i]);7501/* If this is QM error, we need to soft-reset */7502if (i == 15)7503soft_reset_required = true;7504}75057506/* Clear interrupts */7507WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);75087509return soft_reset_required;7510}75117512static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7513{7514return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;7515}75167517static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7518{7519return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;7520}75217522static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)7523{7524ktime_t zero_time = ktime_set(0, 0);75257526mutex_lock(&hdev->clk_throttling.lock);75277528switch (event_type) {7529case GAUDI_EVENT_FIX_POWER_ENV_S:7530hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;7531hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;7532hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();7533hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;7534dev_info_ratelimited(hdev->dev,7535"Clock throttling due to power consumption\n");7536break;75377538case GAUDI_EVENT_FIX_POWER_ENV_E:7539hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;7540hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();7541dev_info_ratelimited(hdev->dev,7542"Power envelop is safe, back to optimal clock\n");7543break;75447545case GAUDI_EVENT_FIX_THERMAL_ENV_S:7546hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;7547hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;7548hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();7549hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;7550*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7551dev_info_ratelimited(hdev->dev,7552"Clock throttling due to overheating\n");7553break;75547555case GAUDI_EVENT_FIX_THERMAL_ENV_E:7556hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;7557hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();7558*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7559dev_info_ratelimited(hdev->dev,7560"Thermal envelop is safe, back to optimal clock\n");7561break;75627563default:7564dev_err(hdev->dev, "Received invalid clock change event %d\n",7565event_type);7566break;7567}75687569mutex_unlock(&hdev->clk_throttling.lock);7570}75717572static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)7573{7574struct gaudi_device *gaudi = hdev->asic_specific;7575struct hl_info_fw_err_info fw_err_info;7576u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;7577u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);7578u32 fw_fatal_err_flag = 0, flags = 0;7579u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)7580>> EQ_CTL_EVENT_TYPE_SHIFT);7581bool reset_required, reset_direct = false;7582u8 cause;7583int rc;75847585if (event_type >= GAUDI_EVENT_SIZE) {7586dev_err(hdev->dev, "Event type %u exceeds maximum of %u",7587event_type, GAUDI_EVENT_SIZE - 1);7588return;7589}75907591gaudi->events_stat[event_type]++;7592gaudi->events_stat_aggregate[event_type]++;75937594switch (event_type) {7595case GAUDI_EVENT_PCIE_CORE_DERR:7596case GAUDI_EVENT_PCIE_IF_DERR:7597case GAUDI_EVENT_PCIE_PHY_DERR:7598case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:7599case GAUDI_EVENT_MME0_ACC_DERR:7600case GAUDI_EVENT_MME0_SBAB_DERR:7601case GAUDI_EVENT_MME1_ACC_DERR:7602case GAUDI_EVENT_MME1_SBAB_DERR:7603case GAUDI_EVENT_MME2_ACC_DERR:7604case GAUDI_EVENT_MME2_SBAB_DERR:7605case GAUDI_EVENT_MME3_ACC_DERR:7606case GAUDI_EVENT_MME3_SBAB_DERR:7607case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:7608fallthrough;7609case GAUDI_EVENT_CPU_IF_ECC_DERR:7610case GAUDI_EVENT_PSOC_MEM_DERR:7611case GAUDI_EVENT_PSOC_CORESIGHT_DERR:7612case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:7613case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:7614case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:7615case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:7616case GAUDI_EVENT_MMU_DERR:7617case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:7618gaudi_print_irq_info(hdev, event_type, true, &event_mask);7619gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);7620event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7621fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;7622goto reset_device;76237624case GAUDI_EVENT_GIC500:7625case GAUDI_EVENT_AXI_ECC:7626case GAUDI_EVENT_L2_RAM_ECC:7627case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:7628gaudi_print_irq_info(hdev, event_type, false, &event_mask);7629fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;7630event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7631goto reset_device;76327633case GAUDI_EVENT_HBM0_SPI_0:7634case GAUDI_EVENT_HBM1_SPI_0:7635case GAUDI_EVENT_HBM2_SPI_0:7636case GAUDI_EVENT_HBM3_SPI_0:7637gaudi_print_irq_info(hdev, event_type, false, &event_mask);7638gaudi_hbm_read_interrupts(hdev,7639gaudi_hbm_event_to_dev(event_type),7640&eq_entry->hbm_ecc_data);7641fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;7642event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7643goto reset_device;76447645case GAUDI_EVENT_HBM0_SPI_1:7646case GAUDI_EVENT_HBM1_SPI_1:7647case GAUDI_EVENT_HBM2_SPI_1:7648case GAUDI_EVENT_HBM3_SPI_1:7649gaudi_print_irq_info(hdev, event_type, false, &event_mask);7650gaudi_hbm_read_interrupts(hdev,7651gaudi_hbm_event_to_dev(event_type),7652&eq_entry->hbm_ecc_data);7653hl_fw_unmask_irq(hdev, event_type);7654event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7655break;76567657case GAUDI_EVENT_TPC0_DEC:7658case GAUDI_EVENT_TPC1_DEC:7659case GAUDI_EVENT_TPC2_DEC:7660case GAUDI_EVENT_TPC3_DEC:7661case GAUDI_EVENT_TPC4_DEC:7662case GAUDI_EVENT_TPC5_DEC:7663case GAUDI_EVENT_TPC6_DEC:7664case GAUDI_EVENT_TPC7_DEC:7665/* In TPC DEC event, notify on TPC assertion. While there isn't7666* a specific event for assertion yet, the FW generates TPC DEC event.7667* The SW upper layer will inspect an internal mapped area to indicate7668* if the event is a TPC Assertion or a "real" TPC DEC.7669*/7670event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;7671gaudi_print_irq_info(hdev, event_type, true, &event_mask);7672reset_required = gaudi_tpc_read_interrupts(hdev,7673tpc_dec_event_to_tpc_id(event_type),7674"AXI_SLV_DEC_Error");7675event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7676if (reset_required) {7677dev_err(hdev->dev, "reset required due to %s\n",7678gaudi_irq_map_table[event_type].name);76797680reset_direct = true;7681goto reset_device;7682} else {7683hl_fw_unmask_irq(hdev, event_type);7684event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;7685}7686break;76877688case GAUDI_EVENT_TPC0_KRN_ERR:7689case GAUDI_EVENT_TPC1_KRN_ERR:7690case GAUDI_EVENT_TPC2_KRN_ERR:7691case GAUDI_EVENT_TPC3_KRN_ERR:7692case GAUDI_EVENT_TPC4_KRN_ERR:7693case GAUDI_EVENT_TPC5_KRN_ERR:7694case GAUDI_EVENT_TPC6_KRN_ERR:7695case GAUDI_EVENT_TPC7_KRN_ERR:7696gaudi_print_irq_info(hdev, event_type, true, &event_mask);7697reset_required = gaudi_tpc_read_interrupts(hdev,7698tpc_krn_event_to_tpc_id(event_type),7699"KRN_ERR");7700event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7701if (reset_required) {7702dev_err(hdev->dev, "reset required due to %s\n",7703gaudi_irq_map_table[event_type].name);77047705reset_direct = true;7706goto reset_device;7707} else {7708hl_fw_unmask_irq(hdev, event_type);7709event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;7710}7711break;77127713case GAUDI_EVENT_PCIE_CORE_SERR:7714case GAUDI_EVENT_PCIE_IF_SERR:7715case GAUDI_EVENT_PCIE_PHY_SERR:7716case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:7717case GAUDI_EVENT_MME0_ACC_SERR:7718case GAUDI_EVENT_MME0_SBAB_SERR:7719case GAUDI_EVENT_MME1_ACC_SERR:7720case GAUDI_EVENT_MME1_SBAB_SERR:7721case GAUDI_EVENT_MME2_ACC_SERR:7722case GAUDI_EVENT_MME2_SBAB_SERR:7723case GAUDI_EVENT_MME3_ACC_SERR:7724case GAUDI_EVENT_MME3_SBAB_SERR:7725case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:7726case GAUDI_EVENT_CPU_IF_ECC_SERR:7727case GAUDI_EVENT_PSOC_MEM_SERR:7728case GAUDI_EVENT_PSOC_CORESIGHT_SERR:7729case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:7730case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:7731case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:7732case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:7733fallthrough;7734case GAUDI_EVENT_MMU_SERR:7735gaudi_print_irq_info(hdev, event_type, true, &event_mask);7736gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);7737hl_fw_unmask_irq(hdev, event_type);7738event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7739break;77407741case GAUDI_EVENT_PCIE_DEC:7742case GAUDI_EVENT_CPU_AXI_SPLITTER:7743case GAUDI_EVENT_PSOC_AXI_DEC:7744case GAUDI_EVENT_PSOC_PRSTN_FALL:7745gaudi_print_irq_info(hdev, event_type, true, &event_mask);7746hl_fw_unmask_irq(hdev, event_type);7747event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7748break;77497750case GAUDI_EVENT_MMU_PAGE_FAULT:7751case GAUDI_EVENT_MMU_WR_PERM:7752gaudi_print_irq_info(hdev, event_type, true, &event_mask);7753hl_fw_unmask_irq(hdev, event_type);7754event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7755break;77567757case GAUDI_EVENT_MME0_WBC_RSP:7758case GAUDI_EVENT_MME0_SBAB0_RSP:7759case GAUDI_EVENT_MME1_WBC_RSP:7760case GAUDI_EVENT_MME1_SBAB0_RSP:7761case GAUDI_EVENT_MME2_WBC_RSP:7762case GAUDI_EVENT_MME2_SBAB0_RSP:7763case GAUDI_EVENT_MME3_WBC_RSP:7764case GAUDI_EVENT_MME3_SBAB0_RSP:7765case GAUDI_EVENT_RAZWI_OR_ADC:7766case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:7767case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:7768fallthrough;7769case GAUDI_EVENT_NIC0_QM0:7770case GAUDI_EVENT_NIC0_QM1:7771case GAUDI_EVENT_NIC1_QM0:7772case GAUDI_EVENT_NIC1_QM1:7773case GAUDI_EVENT_NIC2_QM0:7774case GAUDI_EVENT_NIC2_QM1:7775case GAUDI_EVENT_NIC3_QM0:7776case GAUDI_EVENT_NIC3_QM1:7777case GAUDI_EVENT_NIC4_QM0:7778case GAUDI_EVENT_NIC4_QM1:7779case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:7780case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:7781gaudi_print_irq_info(hdev, event_type, true, &event_mask);7782gaudi_handle_qman_err(hdev, event_type, &event_mask);7783hl_fw_unmask_irq(hdev, event_type);7784event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);7785break;77867787case GAUDI_EVENT_RAZWI_OR_ADC_SW:7788gaudi_print_irq_info(hdev, event_type, true, &event_mask);7789event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7790goto reset_device;77917792case GAUDI_EVENT_TPC0_BMON_SPMU:7793case GAUDI_EVENT_TPC1_BMON_SPMU:7794case GAUDI_EVENT_TPC2_BMON_SPMU:7795case GAUDI_EVENT_TPC3_BMON_SPMU:7796case GAUDI_EVENT_TPC4_BMON_SPMU:7797case GAUDI_EVENT_TPC5_BMON_SPMU:7798case GAUDI_EVENT_TPC6_BMON_SPMU:7799case GAUDI_EVENT_TPC7_BMON_SPMU:7800case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:7801gaudi_print_irq_info(hdev, event_type, false, &event_mask);7802hl_fw_unmask_irq(hdev, event_type);7803event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7804break;78057806case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:7807gaudi_print_nic_axi_irq_info(hdev, event_type, &data);7808hl_fw_unmask_irq(hdev, event_type);7809event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7810break;78117812case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:7813gaudi_print_irq_info(hdev, event_type, false, &event_mask);7814gaudi_print_sm_sei_info(hdev, event_type,7815&eq_entry->sm_sei_data);7816rc = hl_state_dump(hdev);7817event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7818if (rc)7819dev_err(hdev->dev,7820"Error during system state dump %d\n", rc);7821hl_fw_unmask_irq(hdev, event_type);7822break;78237824case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:7825break;78267827case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:7828gaudi_print_clk_change_info(hdev, event_type, &event_mask);7829hl_fw_unmask_irq(hdev, event_type);7830break;78317832case GAUDI_EVENT_PSOC_GPIO_U16_0:7833cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;7834dev_err(hdev->dev,7835"Received high temp H/W interrupt %d (cause %d)\n",7836event_type, cause);7837event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7838break;78397840case GAUDI_EVENT_DEV_RESET_REQ:7841gaudi_print_irq_info(hdev, event_type, false, &event_mask);7842event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7843goto reset_device;78447845case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:7846gaudi_print_irq_info(hdev, event_type, false, &event_mask);7847gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);7848event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7849goto reset_device;78507851case GAUDI_EVENT_FW_ALIVE_S:7852gaudi_print_irq_info(hdev, event_type, false, &event_mask);7853gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);7854fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;7855fw_err_info.event_id = event_type;7856fw_err_info.event_mask = &event_mask;7857hl_handle_fw_err(hdev, &fw_err_info);7858goto reset_device;78597860default:7861dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",7862event_type);7863break;7864}78657866if (event_mask)7867hl_notifier_event_send_all(hdev, event_mask);78687869return;78707871reset_device:7872reset_required = true;78737874if (hdev->asic_prop.fw_security_enabled && !reset_direct) {7875flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;78767877/* notify on device unavailable while the reset triggered by fw */7878event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |7879HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);7880} else if (hdev->hard_reset_on_fw_events) {7881flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;7882event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;7883} else {7884reset_required = false;7885}78867887if (reset_required) {7888/* escalate general hw errors to critical/fatal error */7889if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)7890hl_handle_critical_hw_err(hdev, event_type, &event_mask);78917892hl_device_cond_reset(hdev, flags, event_mask);7893} else {7894hl_fw_unmask_irq(hdev, event_type);7895/* Notification on occurred event needs to be sent although reset is not executed */7896if (event_mask)7897hl_notifier_event_send_all(hdev, event_mask);7898}7899}79007901static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)7902{7903struct gaudi_device *gaudi = hdev->asic_specific;79047905if (aggregate) {7906*size = (u32) sizeof(gaudi->events_stat_aggregate);7907return gaudi->events_stat_aggregate;7908}79097910*size = (u32) sizeof(gaudi->events_stat);7911return gaudi->events_stat;7912}79137914static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)7915{7916struct gaudi_device *gaudi = hdev->asic_specific;7917u32 status, timeout_usec;7918int rc;79197920if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||7921hdev->reset_info.hard_reset_pending)7922return 0;79237924if (hdev->pldm)7925timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;7926else7927timeout_usec = MMU_CONFIG_TIMEOUT_USEC;79287929/* L0 & L1 invalidation */7930WREG32(mmSTLB_INV_PS, 3);7931WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);7932WREG32(mmSTLB_INV_PS, 2);79337934rc = hl_poll_timeout(7935hdev,7936mmSTLB_INV_PS,7937status,7938!status,79391000,7940timeout_usec);79417942WREG32(mmSTLB_INV_SET, 0);79437944return rc;7945}79467947static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,7948bool is_hard, u32 flags,7949u32 asid, u64 va, u64 size)7950{7951/* Treat as invalidate all because there is no range invalidation7952* in Gaudi7953*/7954return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);7955}79567957static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)7958{7959u32 status, timeout_usec;7960int rc;79617962if (hdev->pldm)7963timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;7964else7965timeout_usec = MMU_CONFIG_TIMEOUT_USEC;79667967WREG32(MMU_ASID, asid);7968WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);7969WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);7970WREG32(MMU_BUSY, 0x80000000);79717972rc = hl_poll_timeout(7973hdev,7974MMU_BUSY,7975status,7976!(status & 0x80000000),79771000,7978timeout_usec);79797980if (rc) {7981dev_err(hdev->dev,7982"Timeout during MMU hop0 config of asid %d\n", asid);7983return rc;7984}79857986return 0;7987}79887989static int gaudi_send_heartbeat(struct hl_device *hdev)7990{7991struct gaudi_device *gaudi = hdev->asic_specific;79927993if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))7994return 0;79957996return hl_fw_send_heartbeat(hdev);7997}79987999static int gaudi_cpucp_info_get(struct hl_device *hdev)8000{8001struct gaudi_device *gaudi = hdev->asic_specific;8002struct asic_fixed_properties *prop = &hdev->asic_prop;8003int rc;80048005if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))8006return 0;80078008rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,8009mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,8010mmCPU_BOOT_ERR1);8011if (rc)8012return rc;80138014if (!strlen(prop->cpucp_info.card_name))8015strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,8016CARD_NAME_MAX_LEN);80178018hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);80198020set_default_power_values(hdev);80218022return 0;8023}80248025static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,8026struct engines_data *e)8027{8028struct gaudi_device *gaudi = hdev->asic_specific;8029const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";8030const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";8031const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";8032unsigned long *mask = (unsigned long *)mask_arr;8033u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;8034bool is_idle = true, is_eng_idle, is_slave;8035u64 offset;8036int i, dma_id, port;80378038if (e)8039hl_engine_data_sprintf(e,8040"\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"8041"--- ------- ------------ ---------- -------------\n");80428043for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {8044dma_id = gaudi_dma_assignment[i];8045offset = dma_id * DMA_QMAN_OFFSET;80468047qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);8048qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);8049dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);8050is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&8051IS_DMA_IDLE(dma_core_sts0);8052is_idle &= is_eng_idle;80538054if (mask && !is_eng_idle)8055set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);8056if (e)8057hl_engine_data_sprintf(e, fmt, dma_id,8058is_eng_idle ? "Y" : "N", qm_glbl_sts0,8059qm_cgm_sts, dma_core_sts0);8060}80618062if (e)8063hl_engine_data_sprintf(e,8064"\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"8065"--- ------- ------------ ---------- ----------\n");80668067for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {8068offset = i * TPC_QMAN_OFFSET;8069qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);8070qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);8071tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);8072is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&8073IS_TPC_IDLE(tpc_cfg_sts);8074is_idle &= is_eng_idle;80758076if (mask && !is_eng_idle)8077set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);8078if (e)8079hl_engine_data_sprintf(e, fmt, i,8080is_eng_idle ? "Y" : "N",8081qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);8082}80838084if (e)8085hl_engine_data_sprintf(e,8086"\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"8087"--- ------- ------------ ---------- -----------\n");80888089for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {8090offset = i * MME_QMAN_OFFSET;8091mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);8092is_eng_idle = IS_MME_IDLE(mme_arch_sts);80938094/* MME 1 & 3 are slaves, no need to check their QMANs */8095is_slave = i % 2;8096if (!is_slave) {8097qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);8098qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);8099is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);8100}81018102is_idle &= is_eng_idle;81038104if (mask && !is_eng_idle)8105set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);8106if (e) {8107if (!is_slave)8108hl_engine_data_sprintf(e, fmt, i,8109is_eng_idle ? "Y" : "N",8110qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);8111else8112hl_engine_data_sprintf(e, mme_slave_fmt, i,8113is_eng_idle ? "Y" : "N", "-",8114"-", mme_arch_sts);8115}8116}81178118if (e)8119hl_engine_data_sprintf(e,8120"\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"8121"--- ------- ------------ ----------\n");81228123for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {8124offset = i * NIC_MACRO_QMAN_OFFSET;8125port = 2 * i;8126if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {8127qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);8128qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);8129is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);8130is_idle &= is_eng_idle;81318132if (mask && !is_eng_idle)8133set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);8134if (e)8135hl_engine_data_sprintf(e, nic_fmt, port,8136is_eng_idle ? "Y" : "N",8137qm_glbl_sts0, qm_cgm_sts);8138}81398140port = 2 * i + 1;8141if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {8142qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);8143qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);8144is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);8145is_idle &= is_eng_idle;81468147if (mask && !is_eng_idle)8148set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);8149if (e)8150hl_engine_data_sprintf(e, nic_fmt, port,8151is_eng_idle ? "Y" : "N",8152qm_glbl_sts0, qm_cgm_sts);8153}8154}81558156if (e)8157hl_engine_data_sprintf(e, "\n");81588159return is_idle;8160}81618162static void gaudi_hw_queues_lock(struct hl_device *hdev)8163__acquires(&gaudi->hw_queues_lock)8164{8165struct gaudi_device *gaudi = hdev->asic_specific;81668167spin_lock(&gaudi->hw_queues_lock);8168}81698170static void gaudi_hw_queues_unlock(struct hl_device *hdev)8171__releases(&gaudi->hw_queues_lock)8172{8173struct gaudi_device *gaudi = hdev->asic_specific;81748175spin_unlock(&gaudi->hw_queues_lock);8176}81778178static u32 gaudi_get_pci_id(struct hl_device *hdev)8179{8180return hdev->pdev->device;8181}81828183static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,8184size_t max_size)8185{8186struct gaudi_device *gaudi = hdev->asic_specific;81878188if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))8189return 0;81908191return hl_fw_get_eeprom_data(hdev, data, max_size);8192}81938194static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)8195{8196struct gaudi_device *gaudi = hdev->asic_specific;81978198if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))8199return 0;82008201return hl_fw_get_monitor_dump(hdev, data);8202}82038204/*8205* this function should be used only during initialization and/or after reset,8206* when there are no active users.8207*/8208static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)8209{8210u64 kernel_timeout;8211u32 status, offset;8212int rc;82138214offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);82158216if (hdev->pldm)8217kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;8218else8219kernel_timeout = HL_DEVICE_TIMEOUT_USEC;82208221WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,8222lower_32_bits(tpc_kernel));8223WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,8224upper_32_bits(tpc_kernel));82258226WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,8227lower_32_bits(tpc_kernel));8228WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,8229upper_32_bits(tpc_kernel));8230/* set a valid LUT pointer, content is of no significance */8231WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,8232lower_32_bits(tpc_kernel));8233WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,8234upper_32_bits(tpc_kernel));82358236WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,8237lower_32_bits(CFG_BASE +8238mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));82398240WREG32(mmTPC0_CFG_TPC_CMD + offset,8241(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |82421 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));8243/* wait a bit for the engine to start executing */8244usleep_range(1000, 1500);82458246/* wait until engine has finished executing */8247rc = hl_poll_timeout(8248hdev,8249mmTPC0_CFG_STATUS + offset,8250status,8251(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==8252TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,82531000,8254kernel_timeout);82558256if (rc) {8257dev_err(hdev->dev,8258"Timeout while waiting for TPC%d icache prefetch\n",8259tpc_id);8260return -EIO;8261}82628263WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,82641 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);82658266/* wait a bit for the engine to start executing */8267usleep_range(1000, 1500);82688269/* wait until engine has finished executing */8270rc = hl_poll_timeout(8271hdev,8272mmTPC0_CFG_STATUS + offset,8273status,8274(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==8275TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,82761000,8277kernel_timeout);82788279if (rc) {8280dev_err(hdev->dev,8281"Timeout while waiting for TPC%d vector pipe\n",8282tpc_id);8283return -EIO;8284}82858286rc = hl_poll_timeout(8287hdev,8288mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,8289status,8290(status == 0),82911000,8292kernel_timeout);82938294if (rc) {8295dev_err(hdev->dev,8296"Timeout while waiting for TPC%d kernel to execute\n",8297tpc_id);8298return -EIO;8299}83008301return 0;8302}83038304static int gaudi_internal_cb_pool_init(struct hl_device *hdev,8305struct hl_ctx *ctx)8306{8307struct gaudi_device *gaudi = hdev->asic_specific;8308int min_alloc_order, rc, collective_cb_size;83098310if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))8311return 0;83128313hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,8314HOST_SPACE_INTERNAL_CB_SZ,8315&hdev->internal_cb_pool_dma_addr,8316GFP_KERNEL | __GFP_ZERO);83178318if (!hdev->internal_cb_pool_virt_addr)8319return -ENOMEM;83208321collective_cb_size = sizeof(struct packet_msg_short) * 5 +8322sizeof(struct packet_fence);8323min_alloc_order = ilog2(collective_cb_size);83248325hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);8326if (!hdev->internal_cb_pool) {8327dev_err(hdev->dev,8328"Failed to create internal CB pool\n");8329rc = -ENOMEM;8330goto free_internal_cb_pool;8331}83328333rc = gen_pool_add(hdev->internal_cb_pool,8334(uintptr_t) hdev->internal_cb_pool_virt_addr,8335HOST_SPACE_INTERNAL_CB_SZ, -1);8336if (rc) {8337dev_err(hdev->dev,8338"Failed to add memory to internal CB pool\n");8339rc = -EFAULT;8340goto destroy_internal_cb_pool;8341}83428343hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,8344HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,8345HL_MMU_VA_ALIGNMENT_NOT_NEEDED);83468347if (!hdev->internal_cb_va_base) {8348rc = -ENOMEM;8349goto destroy_internal_cb_pool;8350}83518352mutex_lock(&hdev->mmu_lock);83538354rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,8355hdev->internal_cb_pool_dma_addr,8356HOST_SPACE_INTERNAL_CB_SZ);8357if (rc)8358goto unreserve_internal_cb_pool;83598360rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);8361if (rc)8362goto unmap_internal_cb_pool;83638364mutex_unlock(&hdev->mmu_lock);83658366return 0;83678368unmap_internal_cb_pool:8369hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,8370HOST_SPACE_INTERNAL_CB_SZ);8371unreserve_internal_cb_pool:8372mutex_unlock(&hdev->mmu_lock);8373hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,8374HOST_SPACE_INTERNAL_CB_SZ);8375destroy_internal_cb_pool:8376gen_pool_destroy(hdev->internal_cb_pool);8377free_internal_cb_pool:8378hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,8379hdev->internal_cb_pool_dma_addr);83808381return rc;8382}83838384static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,8385struct hl_ctx *ctx)8386{8387struct gaudi_device *gaudi = hdev->asic_specific;83888389if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))8390return;83918392mutex_lock(&hdev->mmu_lock);8393hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,8394HOST_SPACE_INTERNAL_CB_SZ);8395hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,8396HOST_SPACE_INTERNAL_CB_SZ);8397hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);8398mutex_unlock(&hdev->mmu_lock);83998400gen_pool_destroy(hdev->internal_cb_pool);84018402hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,8403hdev->internal_cb_pool_dma_addr);8404}84058406static int gaudi_ctx_init(struct hl_ctx *ctx)8407{8408int rc;84098410if (ctx->asid == HL_KERNEL_ASID_ID)8411return 0;84128413rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);8414if (rc)8415return rc;84168417rc = gaudi_restore_user_registers(ctx->hdev);8418if (rc)8419gaudi_internal_cb_pool_fini(ctx->hdev, ctx);84208421return rc;8422}84238424static void gaudi_ctx_fini(struct hl_ctx *ctx)8425{8426if (ctx->asid == HL_KERNEL_ASID_ID)8427return;84288429gaudi_internal_cb_pool_fini(ctx->hdev, ctx);8430}84318432static int gaudi_pre_schedule_cs(struct hl_cs *cs)8433{8434return 0;8435}84368437static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)8438{8439return gaudi_cq_assignment[cq_idx];8440}84418442static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)8443{8444return sizeof(struct packet_msg_short) +8445sizeof(struct packet_msg_prot) * 2;8446}84478448static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)8449{8450return sizeof(struct packet_msg_short) * 4 +8451sizeof(struct packet_fence) +8452sizeof(struct packet_msg_prot) * 2;8453}84548455static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)8456{8457return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);8458}84598460static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,8461u32 size, bool eb)8462{8463struct hl_cb *cb = (struct hl_cb *) data;8464struct packet_msg_short *pkt;8465u32 value, ctl, pkt_size = sizeof(*pkt);84668467pkt = cb->kernel_address + size;8468memset(pkt, 0, pkt_size);84698470/* Inc by 1, Mode ADD */8471value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);8472value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);84738474ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);8475ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */8476ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */8477ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);8478ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);8479ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);8480ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);84818482pkt->value = cpu_to_le32(value);8483pkt->ctl = cpu_to_le32(ctl);84848485return size + pkt_size;8486}84878488static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,8489u16 addr)8490{8491u32 ctl, pkt_size = sizeof(*pkt);84928493memset(pkt, 0, pkt_size);84948495ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);8496ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */8497ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);8498ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);8499ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);8500ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */85018502pkt->value = cpu_to_le32(value);8503pkt->ctl = cpu_to_le32(ctl);85048505return pkt_size;8506}85078508static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,8509struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,8510u16 sob_val, u16 mon_id)8511{8512u64 monitor_base;8513u32 ctl, value, pkt_size = sizeof(*pkt);8514u16 msg_addr_offset;8515u8 mask;85168517if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {8518dev_err(hdev->dev,8519"sob_base %u (mask %#x) is not valid\n",8520sob_base, sob_mask);8521return 0;8522}85238524/*8525* monitor_base should be the content of the base0 address registers,8526* so it will be added to the msg short offsets8527*/8528monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;85298530msg_addr_offset =8531(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -8532monitor_base;85338534memset(pkt, 0, pkt_size);85358536/* Monitor config packet: bind the monitor to a sync object */8537value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);8538value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);8539value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,85400); /* GREATER OR EQUAL*/8541value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);85428543ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);8544ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */8545ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */8546ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);8547ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);8548ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);8549ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);85508551pkt->value = cpu_to_le32(value);8552pkt->ctl = cpu_to_le32(ctl);85538554return pkt_size;8555}85568557static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)8558{8559u32 ctl, cfg, pkt_size = sizeof(*pkt);85608561memset(pkt, 0, pkt_size);85628563cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);8564cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);8565cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);85668567ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);8568ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);8569ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);8570ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);85718572pkt->cfg = cpu_to_le32(cfg);8573pkt->ctl = cpu_to_le32(ctl);85748575return pkt_size;8576}85778578static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)8579{8580u32 offset, nic_index;85818582switch (queue_id) {8583case GAUDI_QUEUE_ID_DMA_0_0:8584offset = mmDMA0_QM_CP_FENCE2_RDATA_0;8585break;8586case GAUDI_QUEUE_ID_DMA_0_1:8587offset = mmDMA0_QM_CP_FENCE2_RDATA_1;8588break;8589case GAUDI_QUEUE_ID_DMA_0_2:8590offset = mmDMA0_QM_CP_FENCE2_RDATA_2;8591break;8592case GAUDI_QUEUE_ID_DMA_0_3:8593offset = mmDMA0_QM_CP_FENCE2_RDATA_3;8594break;8595case GAUDI_QUEUE_ID_DMA_1_0:8596offset = mmDMA1_QM_CP_FENCE2_RDATA_0;8597break;8598case GAUDI_QUEUE_ID_DMA_1_1:8599offset = mmDMA1_QM_CP_FENCE2_RDATA_1;8600break;8601case GAUDI_QUEUE_ID_DMA_1_2:8602offset = mmDMA1_QM_CP_FENCE2_RDATA_2;8603break;8604case GAUDI_QUEUE_ID_DMA_1_3:8605offset = mmDMA1_QM_CP_FENCE2_RDATA_3;8606break;8607case GAUDI_QUEUE_ID_DMA_5_0:8608offset = mmDMA5_QM_CP_FENCE2_RDATA_0;8609break;8610case GAUDI_QUEUE_ID_DMA_5_1:8611offset = mmDMA5_QM_CP_FENCE2_RDATA_1;8612break;8613case GAUDI_QUEUE_ID_DMA_5_2:8614offset = mmDMA5_QM_CP_FENCE2_RDATA_2;8615break;8616case GAUDI_QUEUE_ID_DMA_5_3:8617offset = mmDMA5_QM_CP_FENCE2_RDATA_3;8618break;8619case GAUDI_QUEUE_ID_TPC_7_0:8620offset = mmTPC7_QM_CP_FENCE2_RDATA_0;8621break;8622case GAUDI_QUEUE_ID_TPC_7_1:8623offset = mmTPC7_QM_CP_FENCE2_RDATA_1;8624break;8625case GAUDI_QUEUE_ID_TPC_7_2:8626offset = mmTPC7_QM_CP_FENCE2_RDATA_2;8627break;8628case GAUDI_QUEUE_ID_TPC_7_3:8629offset = mmTPC7_QM_CP_FENCE2_RDATA_3;8630break;8631case GAUDI_QUEUE_ID_NIC_0_0:8632case GAUDI_QUEUE_ID_NIC_1_0:8633case GAUDI_QUEUE_ID_NIC_2_0:8634case GAUDI_QUEUE_ID_NIC_3_0:8635case GAUDI_QUEUE_ID_NIC_4_0:8636case GAUDI_QUEUE_ID_NIC_5_0:8637case GAUDI_QUEUE_ID_NIC_6_0:8638case GAUDI_QUEUE_ID_NIC_7_0:8639case GAUDI_QUEUE_ID_NIC_8_0:8640case GAUDI_QUEUE_ID_NIC_9_0:8641nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;8642offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +8643(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +8644(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;8645break;8646case GAUDI_QUEUE_ID_NIC_0_1:8647case GAUDI_QUEUE_ID_NIC_1_1:8648case GAUDI_QUEUE_ID_NIC_2_1:8649case GAUDI_QUEUE_ID_NIC_3_1:8650case GAUDI_QUEUE_ID_NIC_4_1:8651case GAUDI_QUEUE_ID_NIC_5_1:8652case GAUDI_QUEUE_ID_NIC_6_1:8653case GAUDI_QUEUE_ID_NIC_7_1:8654case GAUDI_QUEUE_ID_NIC_8_1:8655case GAUDI_QUEUE_ID_NIC_9_1:8656nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;8657offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +8658(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +8659(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;8660break;8661case GAUDI_QUEUE_ID_NIC_0_2:8662case GAUDI_QUEUE_ID_NIC_1_2:8663case GAUDI_QUEUE_ID_NIC_2_2:8664case GAUDI_QUEUE_ID_NIC_3_2:8665case GAUDI_QUEUE_ID_NIC_4_2:8666case GAUDI_QUEUE_ID_NIC_5_2:8667case GAUDI_QUEUE_ID_NIC_6_2:8668case GAUDI_QUEUE_ID_NIC_7_2:8669case GAUDI_QUEUE_ID_NIC_8_2:8670case GAUDI_QUEUE_ID_NIC_9_2:8671nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;8672offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +8673(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +8674(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;8675break;8676case GAUDI_QUEUE_ID_NIC_0_3:8677case GAUDI_QUEUE_ID_NIC_1_3:8678case GAUDI_QUEUE_ID_NIC_2_3:8679case GAUDI_QUEUE_ID_NIC_3_3:8680case GAUDI_QUEUE_ID_NIC_4_3:8681case GAUDI_QUEUE_ID_NIC_5_3:8682case GAUDI_QUEUE_ID_NIC_6_3:8683case GAUDI_QUEUE_ID_NIC_7_3:8684case GAUDI_QUEUE_ID_NIC_8_3:8685case GAUDI_QUEUE_ID_NIC_9_3:8686nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;8687offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +8688(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +8689(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;8690break;8691default:8692return -EINVAL;8693}86948695*addr = CFG_BASE + offset;86968697return 0;8698}86998700static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)8701{8702u64 monitor_base;8703u32 size = 0;8704u16 msg_addr_offset;87058706/*8707* monitor_base should be the content of the base0 address registers,8708* so it will be added to the msg short offsets8709*/8710monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;87118712/* First monitor config packet: low address of the sync */8713msg_addr_offset =8714(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -8715monitor_base;87168717size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,8718msg_addr_offset);87198720/* Second monitor config packet: high address of the sync */8721msg_addr_offset =8722(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -8723monitor_base;87248725size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),8726msg_addr_offset);87278728/*8729* Third monitor config packet: the payload, i.e. what to write when the8730* sync triggers8731*/8732msg_addr_offset =8733(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -8734monitor_base;87358736size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);87378738return size;8739}87408741static u32 gaudi_gen_wait_cb(struct hl_device *hdev,8742struct hl_gen_wait_properties *prop)8743{8744struct hl_cb *cb = (struct hl_cb *) prop->data;8745void *buf = cb->kernel_address;8746u64 fence_addr = 0;8747u32 size = prop->size;87488749if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {8750dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",8751prop->q_idx);8752return 0;8753}87548755size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);8756size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,8757prop->sob_mask, prop->sob_val, prop->mon_id);8758size += gaudi_add_fence_pkt(buf + size);87598760return size;8761}87628763static void gaudi_reset_sob(struct hl_device *hdev, void *data)8764{8765struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;87668767dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,8768hw_sob->sob_id);87698770WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +8771hw_sob->sob_id * 4, 0);87728773kref_init(&hw_sob->kref);8774}87758776static u64 gaudi_get_device_time(struct hl_device *hdev)8777{8778u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;87798780return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);8781}87828783static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,8784u32 *block_size, u32 *block_id)8785{8786return -EPERM;8787}87888789static int gaudi_block_mmap(struct hl_device *hdev,8790struct vm_area_struct *vma,8791u32 block_id, u32 block_size)8792{8793return -EPERM;8794}87958796static void gaudi_enable_events_from_fw(struct hl_device *hdev)8797{8798struct cpu_dyn_regs *dyn_regs =8799&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;8800u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?8801mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :8802le32_to_cpu(dyn_regs->gic_host_ints_irq);88038804WREG32(irq_handler_offset,8805gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);8806}88078808static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)8809{8810return -EINVAL;8811}88128813static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8814{8815switch (pll_idx) {8816case HL_GAUDI_CPU_PLL: return CPU_PLL;8817case HL_GAUDI_PCI_PLL: return PCI_PLL;8818case HL_GAUDI_NIC_PLL: return NIC_PLL;8819case HL_GAUDI_DMA_PLL: return DMA_PLL;8820case HL_GAUDI_MESH_PLL: return MESH_PLL;8821case HL_GAUDI_MME_PLL: return MME_PLL;8822case HL_GAUDI_TPC_PLL: return TPC_PLL;8823case HL_GAUDI_IF_PLL: return IF_PLL;8824case HL_GAUDI_SRAM_PLL: return SRAM_PLL;8825case HL_GAUDI_HBM_PLL: return HBM_PLL;8826default: return -EINVAL;8827}8828}88298830static int gaudi_add_sync_to_engine_map_entry(8831struct hl_sync_to_engine_map *map, u32 reg_value,8832enum hl_sync_engine_type engine_type, u32 engine_id)8833{8834struct hl_sync_to_engine_map_entry *entry;88358836/* Reg value represents a partial address of sync object,8837* it is used as unique identifier. For this we need to8838* clear the cutoff cfg base bits from the value.8839*/8840if (reg_value == 0 || reg_value == 0xffffffff)8841return 0;8842reg_value -= lower_32_bits(CFG_BASE);88438844/* create a new hash entry */8845entry = kzalloc(sizeof(*entry), GFP_KERNEL);8846if (!entry)8847return -ENOMEM;8848entry->engine_type = engine_type;8849entry->engine_id = engine_id;8850entry->sync_id = reg_value;8851hash_add(map->tb, &entry->node, reg_value);88528853return 0;8854}88558856static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,8857struct hl_sync_to_engine_map *map)8858{8859struct hl_state_dump_specs *sds = &hdev->state_dump_specs;8860int i, j, rc;8861u32 reg_value;88628863/* Iterate over TPC engines */8864for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {88658866reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +8867sds->props[SP_NEXT_TPC] * i);88688869rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,8870ENGINE_TPC, i);8871if (rc)8872goto free_sync_to_engine_map;8873}88748875/* Iterate over MME engines */8876for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {8877for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {88788879reg_value = RREG32(sds->props[SP_MME_CFG_SO] +8880sds->props[SP_NEXT_MME] * i +8881j * sizeof(u32));88828883rc = gaudi_add_sync_to_engine_map_entry(8884map, reg_value, ENGINE_MME,8885i * sds->props[SP_SUB_MME_ENG_NUM] + j);8886if (rc)8887goto free_sync_to_engine_map;8888}8889}88908891/* Iterate over DMA engines */8892for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {8893reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +8894sds->props[SP_DMA_QUEUES_OFFSET] * i);8895rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,8896ENGINE_DMA, i);8897if (rc)8898goto free_sync_to_engine_map;8899}89008901return 0;89028903free_sync_to_engine_map:8904hl_state_dump_free_sync_to_engine_map(map);89058906return rc;8907}89088909static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)8910{8911return FIELD_GET(8912SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,8913mon->status);8914}89158916static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)8917{8918const size_t max_write = 10;8919u32 gid, mask, sob;8920int i, offset;89218922/* Sync object ID is calculated as follows:8923* (8 * group_id + cleared bits in mask)8924*/8925gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,8926mon->arm_data);8927mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,8928mon->arm_data);89298930for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -8931max_write; mask >>= 1, i++) {8932if (!(mask & 1)) {8933sob = gid * MONITOR_MAX_SOBS + i;89348935if (offset > 0)8936offset += snprintf(sobs + offset, max_write,8937", ");89388939offset += snprintf(sobs + offset, max_write, "%u", sob);8940}8941}8942}89438944static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,8945struct hl_device *hdev,8946struct hl_mon_state_dump *mon)8947{8948const char *name;8949char scratch_buf1[BIN_REG_STRING_SIZE],8950scratch_buf2[BIN_REG_STRING_SIZE];8951char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};89528953name = hl_state_dump_get_monitor_name(hdev, mon);8954if (!name)8955name = "";89568957gaudi_fill_sobs_from_mon(monitored_sobs, mon);89588959return hl_snprintf_resize(8960buf, size, offset,8961"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",8962mon->id, name,8963FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,8964mon->arm_data),8965hl_format_as_binary(8966scratch_buf1, sizeof(scratch_buf1),8967FIELD_GET(8968SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,8969mon->arm_data)),8970FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,8971mon->arm_data),8972mon->wr_data,8973(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,8974hl_format_as_binary(8975scratch_buf2, sizeof(scratch_buf2),8976FIELD_GET(8977SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,8978mon->status)),8979monitored_sobs);8980}898189828983static int gaudi_print_fences_single_engine(8984struct hl_device *hdev, u64 base_offset, u64 status_base_offset,8985enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,8986size_t *size, size_t *offset)8987{8988struct hl_state_dump_specs *sds = &hdev->state_dump_specs;8989int rc = -ENOMEM, i;8990u32 *statuses, *fences;89918992statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],8993sizeof(*statuses), GFP_KERNEL);8994if (!statuses)8995goto out;89968997fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *8998sds->props[SP_ENGINE_NUM_OF_QUEUES],8999sizeof(*fences), GFP_KERNEL);9000if (!fences)9001goto free_status;90029003for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)9004statuses[i] = RREG32(status_base_offset + i * sizeof(u32));90059006for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *9007sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)9008fences[i] = RREG32(base_offset + i * sizeof(u32));90099010/* The actual print */9011for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {9012u32 fence_id;9013u64 fence_cnt, fence_rdata;9014const char *engine_name;90159016if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,9017statuses[i]))9018continue;90199020fence_id =9021FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);9022fence_cnt = base_offset + CFG_BASE +9023sizeof(u32) *9024(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);9025fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +9026sds->props[SP_FENCE0_RDATA_OFFSET];9027engine_name = hl_sync_engine_to_string(engine_type);90289029rc = hl_snprintf_resize(9030buf, size, offset,9031"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",9032engine_name, engine_id,9033i, fence_id,9034fence_cnt, engine_name, engine_id, fence_id, i,9035fence_rdata, engine_name, engine_id, fence_id, i,9036fences[fence_id],9037statuses[i]);9038if (rc)9039goto free_fences;9040}90419042rc = 0;90439044free_fences:9045kfree(fences);9046free_status:9047kfree(statuses);9048out:9049return rc;9050}905190529053static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {9054.monitor_valid = gaudi_monitor_valid,9055.print_single_monitor = gaudi_print_single_monitor,9056.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,9057.print_fences_single_engine = gaudi_print_fences_single_engine,9058};90599060static void gaudi_state_dump_init(struct hl_device *hdev)9061{9062struct hl_state_dump_specs *sds = &hdev->state_dump_specs;9063int i;90649065for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)9066hash_add(sds->so_id_to_str_tb,9067&gaudi_so_id_to_str[i].node,9068gaudi_so_id_to_str[i].id);90699070for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)9071hash_add(sds->monitor_id_to_str_tb,9072&gaudi_monitor_id_to_str[i].node,9073gaudi_monitor_id_to_str[i].id);90749075sds->props = gaudi_state_dump_specs_props;90769077sds->sync_namager_names = gaudi_sync_manager_names;90789079sds->funcs = gaudi_state_dump_funcs;9080}90819082static u32 *gaudi_get_stream_master_qid_arr(void)9083{9084return gaudi_stream_master;9085}90869087static int gaudi_set_dram_properties(struct hl_device *hdev)9088{9089return 0;9090}90919092static int gaudi_set_binning_masks(struct hl_device *hdev)9093{9094return 0;9095}90969097static void gaudi_check_if_razwi_happened(struct hl_device *hdev)9098{9099}91009101static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)9102{9103struct hl_device *hdev = dev_get_drvdata(dev);9104struct cpucp_info *cpucp_info;91059106cpucp_info = &hdev->asic_prop.cpucp_info;91079108return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));9109}91109111static DEVICE_ATTR_RO(infineon_ver);91129113static struct attribute *gaudi_vrm_dev_attrs[] = {9114&dev_attr_infineon_ver.attr,9115NULL,9116};91179118static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,9119struct attribute_group *dev_vrm_attr_grp)9120{9121hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);9122dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;9123}91249125static int gaudi_send_device_activity(struct hl_device *hdev, bool open)9126{9127return 0;9128}91299130static const struct hl_asic_funcs gaudi_funcs = {9131.early_init = gaudi_early_init,9132.early_fini = gaudi_early_fini,9133.late_init = gaudi_late_init,9134.late_fini = gaudi_late_fini,9135.sw_init = gaudi_sw_init,9136.sw_fini = gaudi_sw_fini,9137.hw_init = gaudi_hw_init,9138.hw_fini = gaudi_hw_fini,9139.halt_engines = gaudi_halt_engines,9140.suspend = gaudi_suspend,9141.resume = gaudi_resume,9142.mmap = gaudi_mmap,9143.ring_doorbell = gaudi_ring_doorbell,9144.pqe_write = gaudi_pqe_write,9145.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,9146.asic_dma_free_coherent = gaudi_dma_free_coherent,9147.scrub_device_mem = gaudi_scrub_device_mem,9148.scrub_device_dram = gaudi_scrub_device_dram,9149.get_int_queue_base = gaudi_get_int_queue_base,9150.test_queues = gaudi_test_queues,9151.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,9152.asic_dma_pool_free = gaudi_dma_pool_free,9153.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,9154.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,9155.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,9156.cs_parser = gaudi_cs_parser,9157.dma_map_sgtable = hl_asic_dma_map_sgtable,9158.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,9159.update_eq_ci = gaudi_update_eq_ci,9160.context_switch = gaudi_context_switch,9161.restore_phase_topology = gaudi_restore_phase_topology,9162.debugfs_read_dma = gaudi_debugfs_read_dma,9163.add_device_attr = gaudi_add_device_attr,9164.handle_eqe = gaudi_handle_eqe,9165.get_events_stat = gaudi_get_events_stat,9166.read_pte = gaudi_read_pte,9167.write_pte = gaudi_write_pte,9168.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,9169.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,9170.mmu_prefetch_cache_range = NULL,9171.send_heartbeat = gaudi_send_heartbeat,9172.debug_coresight = gaudi_debug_coresight,9173.is_device_idle = gaudi_is_device_idle,9174.compute_reset_late_init = gaudi_compute_reset_late_init,9175.hw_queues_lock = gaudi_hw_queues_lock,9176.hw_queues_unlock = gaudi_hw_queues_unlock,9177.get_pci_id = gaudi_get_pci_id,9178.get_eeprom_data = gaudi_get_eeprom_data,9179.get_monitor_dump = gaudi_get_monitor_dump,9180.send_cpu_message = gaudi_send_cpu_message,9181.pci_bars_map = gaudi_pci_bars_map,9182.init_iatu = gaudi_init_iatu,9183.rreg = hl_rreg,9184.wreg = hl_wreg,9185.halt_coresight = gaudi_halt_coresight,9186.ctx_init = gaudi_ctx_init,9187.ctx_fini = gaudi_ctx_fini,9188.pre_schedule_cs = gaudi_pre_schedule_cs,9189.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,9190.load_firmware_to_device = gaudi_load_firmware_to_device,9191.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,9192.get_signal_cb_size = gaudi_get_signal_cb_size,9193.get_wait_cb_size = gaudi_get_wait_cb_size,9194.gen_signal_cb = gaudi_gen_signal_cb,9195.gen_wait_cb = gaudi_gen_wait_cb,9196.reset_sob = gaudi_reset_sob,9197.reset_sob_group = gaudi_reset_sob_group,9198.get_device_time = gaudi_get_device_time,9199.pb_print_security_errors = NULL,9200.collective_wait_init_cs = gaudi_collective_wait_init_cs,9201.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,9202.get_dec_base_addr = NULL,9203.scramble_addr = hl_mmu_scramble_addr,9204.descramble_addr = hl_mmu_descramble_addr,9205.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,9206.get_hw_block_id = gaudi_get_hw_block_id,9207.hw_block_mmap = gaudi_block_mmap,9208.enable_events_from_fw = gaudi_enable_events_from_fw,9209.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,9210.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,9211.init_firmware_preload_params = gaudi_init_firmware_preload_params,9212.init_firmware_loader = gaudi_init_firmware_loader,9213.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,9214.state_dump_init = gaudi_state_dump_init,9215.get_sob_addr = gaudi_get_sob_addr,9216.set_pci_memory_regions = gaudi_set_pci_memory_regions,9217.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,9218.check_if_razwi_happened = gaudi_check_if_razwi_happened,9219.mmu_get_real_page_size = hl_mmu_get_real_page_size,9220.access_dev_mem = hl_access_dev_mem,9221.set_dram_bar_base = gaudi_set_hbm_bar_base,9222.send_device_activity = gaudi_send_device_activity,9223.set_dram_properties = gaudi_set_dram_properties,9224.set_binning_masks = gaudi_set_binning_masks,9225};92269227/**9228* gaudi_set_asic_funcs - set GAUDI function pointers9229*9230* @hdev: pointer to hl_device structure9231*9232*/9233void gaudi_set_asic_funcs(struct hl_device *hdev)9234{9235hdev->asic_funcs = &gaudi_funcs;9236}923792389239