Path: blob/master/drivers/accel/habanalabs/common/debugfs.c
29281 views
// SPDX-License-Identifier: GPL-2.012/*3* Copyright 2016-2021 HabanaLabs, Ltd.4* All Rights Reserved.5*/67#include "habanalabs.h"8#include "hldio.h"9#include "../include/hw_ip/mmu/mmu_general.h"1011#include <linux/pci.h>12#include <linux/uaccess.h>13#include <linux/vmalloc.h>14#include <linux/iommu.h>1516#define MMU_ADDR_BUF_SIZE 4017#define MMU_ASID_BUF_SIZE 1018#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)19#define I2C_MAX_TRANSACTION_LEN 82021static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,22u8 i2c_reg, u8 i2c_len, u64 *val)23{24struct cpucp_packet pkt;25int rc;2627if (!hl_device_operational(hdev, NULL))28return -EBUSY;2930if (i2c_len > I2C_MAX_TRANSACTION_LEN) {31dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n",32i2c_len, I2C_MAX_TRANSACTION_LEN);33return -EINVAL;34}3536memset(&pkt, 0, sizeof(pkt));3738pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD <<39CPUCP_PKT_CTL_OPCODE_SHIFT);40pkt.i2c_bus = i2c_bus;41pkt.i2c_addr = i2c_addr;42pkt.i2c_reg = i2c_reg;43pkt.i2c_len = i2c_len;4445rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, val);46if (rc && rc != -EAGAIN)47dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);4849return rc;50}5152static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,53u8 i2c_reg, u8 i2c_len, u64 val)54{55struct cpucp_packet pkt;56int rc;5758if (!hl_device_operational(hdev, NULL))59return -EBUSY;6061if (i2c_len > I2C_MAX_TRANSACTION_LEN) {62dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n",63i2c_len, I2C_MAX_TRANSACTION_LEN);64return -EINVAL;65}6667memset(&pkt, 0, sizeof(pkt));6869pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR <<70CPUCP_PKT_CTL_OPCODE_SHIFT);71pkt.i2c_bus = i2c_bus;72pkt.i2c_addr = i2c_addr;73pkt.i2c_reg = i2c_reg;74pkt.i2c_len = i2c_len;75pkt.value = cpu_to_le64(val);7677rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);78if (rc && rc != -EAGAIN)79dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);8081return rc;82}8384static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)85{86struct cpucp_packet pkt;87int rc;8889if (!hl_device_operational(hdev, NULL))90return;9192memset(&pkt, 0, sizeof(pkt));9394pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET <<95CPUCP_PKT_CTL_OPCODE_SHIFT);96pkt.led_index = cpu_to_le32(led);97pkt.value = cpu_to_le64(state);9899rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);100if (rc && rc != -EAGAIN)101dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);102}103104static int command_buffers_show(struct seq_file *s, void *data)105{106struct hl_debugfs_entry *entry = s->private;107struct hl_dbg_device_entry *dev_entry = entry->dev_entry;108struct hl_cb *cb;109bool first = true;110111spin_lock(&dev_entry->cb_spinlock);112113list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) {114if (first) {115first = false;116seq_puts(s, "\n");117seq_puts(s, " CB ID CTX ID CB size CB RefCnt mmap? CS counter\n");118seq_puts(s, "---------------------------------------------------------------\n");119}120seq_printf(s,121" %03llu %d 0x%08x %d %d %d\n",122cb->buf->handle, cb->ctx->asid, cb->size,123kref_read(&cb->buf->refcount),124atomic_read(&cb->buf->mmap), atomic_read(&cb->cs_cnt));125}126127spin_unlock(&dev_entry->cb_spinlock);128129if (!first)130seq_puts(s, "\n");131132return 0;133}134135static int command_submission_show(struct seq_file *s, void *data)136{137struct hl_debugfs_entry *entry = s->private;138struct hl_dbg_device_entry *dev_entry = entry->dev_entry;139struct hl_cs *cs;140bool first = true;141142spin_lock(&dev_entry->cs_spinlock);143144list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) {145if (first) {146first = false;147seq_puts(s, "\n");148seq_puts(s, " CS ID CS TYPE CTX ASID CS RefCnt Submitted Completed\n");149seq_puts(s, "----------------------------------------------------------------\n");150}151seq_printf(s,152" %llu %d %d %d %d %d\n",153cs->sequence, cs->type, cs->ctx->asid,154kref_read(&cs->refcount),155cs->submitted, cs->completed);156}157158spin_unlock(&dev_entry->cs_spinlock);159160if (!first)161seq_puts(s, "\n");162163return 0;164}165166static int command_submission_jobs_show(struct seq_file *s, void *data)167{168struct hl_debugfs_entry *entry = s->private;169struct hl_dbg_device_entry *dev_entry = entry->dev_entry;170struct hl_cs_job *job;171bool first = true;172173spin_lock(&dev_entry->cs_job_spinlock);174175list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) {176if (first) {177first = false;178seq_puts(s, "\n");179seq_puts(s, " JOB ID CS ID CS TYPE CTX ASID JOB RefCnt H/W Queue\n");180seq_puts(s, "---------------------------------------------------------------\n");181}182if (job->cs)183seq_printf(s,184" %02d %llu %d %d %d %d\n",185job->id, job->cs->sequence, job->cs->type,186job->cs->ctx->asid, kref_read(&job->refcount),187job->hw_queue_id);188else189seq_printf(s,190" %02d 0 0 %d %d %d\n",191job->id, HL_KERNEL_ASID_ID,192kref_read(&job->refcount), job->hw_queue_id);193}194195spin_unlock(&dev_entry->cs_job_spinlock);196197if (!first)198seq_puts(s, "\n");199200return 0;201}202203static int userptr_show(struct seq_file *s, void *data)204{205struct hl_debugfs_entry *entry = s->private;206struct hl_dbg_device_entry *dev_entry = entry->dev_entry;207struct hl_userptr *userptr;208char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",209"DMA_FROM_DEVICE", "DMA_NONE"};210bool first = true;211212spin_lock(&dev_entry->userptr_spinlock);213214list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {215if (first) {216first = false;217seq_puts(s, "\n");218seq_puts(s, " pid user virtual address size dma dir\n");219seq_puts(s, "----------------------------------------------------------\n");220}221seq_printf(s, " %-7d 0x%-14llx %-10llu %-30s\n",222userptr->pid, userptr->addr, userptr->size,223dma_dir[userptr->dir]);224}225226spin_unlock(&dev_entry->userptr_spinlock);227228if (!first)229seq_puts(s, "\n");230231return 0;232}233234static int vm_show(struct seq_file *s, void *data)235{236struct hl_debugfs_entry *entry = s->private;237struct hl_dbg_device_entry *dev_entry = entry->dev_entry;238struct hl_vm_hw_block_list_node *lnode;239struct hl_ctx *ctx;240struct hl_vm *vm;241struct hl_vm_hash_node *hnode;242struct hl_userptr *userptr;243struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;244struct hl_va_range *va_range;245struct hl_vm_va_block *va_block;246enum vm_type *vm_type;247bool once = true;248u64 j;249int i;250251mutex_lock(&dev_entry->ctx_mem_hash_mutex);252253list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {254once = false;255seq_puts(s, "\n\n----------------------------------------------------");256seq_puts(s, "\n----------------------------------------------------\n\n");257seq_printf(s, "ctx asid: %u\n", ctx->asid);258259seq_puts(s, "\nmappings:\n\n");260seq_puts(s, " virtual address size handle\n");261seq_puts(s, "----------------------------------------------------\n");262mutex_lock(&ctx->mem_hash_lock);263hash_for_each(ctx->mem_hash, i, hnode, node) {264vm_type = hnode->ptr;265266if (*vm_type == VM_TYPE_USERPTR) {267userptr = hnode->ptr;268seq_printf(s,269" 0x%-14llx %-10llu\n",270hnode->vaddr, userptr->size);271} else {272phys_pg_pack = hnode->ptr;273seq_printf(s,274" 0x%-14llx %-10llu %-4u\n",275hnode->vaddr, phys_pg_pack->total_size,276phys_pg_pack->handle);277}278}279mutex_unlock(&ctx->mem_hash_lock);280281if (ctx->asid != HL_KERNEL_ASID_ID &&282!list_empty(&ctx->hw_block_mem_list)) {283seq_puts(s, "\nhw_block mappings:\n\n");284seq_puts(s,285" virtual address block size mapped size HW block id\n");286seq_puts(s,287"---------------------------------------------------------------\n");288mutex_lock(&ctx->hw_block_list_lock);289list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) {290seq_printf(s,291" 0x%-14lx %-6u %-6u %-9u\n",292lnode->vaddr, lnode->block_size, lnode->mapped_size,293lnode->id);294}295mutex_unlock(&ctx->hw_block_list_lock);296}297298vm = &ctx->hdev->vm;299spin_lock(&vm->idr_lock);300301if (!idr_is_empty(&vm->phys_pg_pack_handles))302seq_puts(s, "\n\nallocations:\n");303304idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) {305if (phys_pg_pack->asid != ctx->asid)306continue;307308seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle);309seq_printf(s, "page size: %u\n\n",310phys_pg_pack->page_size);311seq_puts(s, " physical address\n");312seq_puts(s, "---------------------\n");313for (j = 0 ; j < phys_pg_pack->npages ; j++) {314seq_printf(s, " 0x%-14llx\n",315phys_pg_pack->pages[j]);316}317}318spin_unlock(&vm->idr_lock);319320}321322mutex_unlock(&dev_entry->ctx_mem_hash_mutex);323324ctx = hl_get_compute_ctx(dev_entry->hdev);325if (ctx) {326seq_puts(s, "\nVA ranges:\n\n");327for (i = HL_VA_RANGE_TYPE_HOST ; i < HL_VA_RANGE_TYPE_MAX ; ++i) {328va_range = ctx->va_range[i];329seq_printf(s, " va_range %d\n", i);330seq_puts(s, "---------------------\n");331mutex_lock(&va_range->lock);332list_for_each_entry(va_block, &va_range->list, node) {333seq_printf(s, "%#16llx - %#16llx (%#llx)\n",334va_block->start, va_block->end,335va_block->size);336}337mutex_unlock(&va_range->lock);338seq_puts(s, "\n");339}340hl_ctx_put(ctx);341}342343if (!once)344seq_puts(s, "\n");345346return 0;347}348349static int userptr_lookup_show(struct seq_file *s, void *data)350{351struct hl_debugfs_entry *entry = s->private;352struct hl_dbg_device_entry *dev_entry = entry->dev_entry;353struct scatterlist *sg;354struct hl_userptr *userptr;355bool first = true;356u64 total_npages, npages, sg_start, sg_end;357dma_addr_t dma_addr;358int i;359360spin_lock(&dev_entry->userptr_spinlock);361362list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {363if (dev_entry->userptr_lookup >= userptr->addr &&364dev_entry->userptr_lookup < userptr->addr + userptr->size) {365total_npages = 0;366for_each_sgtable_dma_sg(userptr->sgt, sg, i) {367npages = hl_get_sg_info(sg, &dma_addr);368sg_start = userptr->addr +369total_npages * PAGE_SIZE;370sg_end = userptr->addr +371(total_npages + npages) * PAGE_SIZE;372373if (dev_entry->userptr_lookup >= sg_start &&374dev_entry->userptr_lookup < sg_end) {375dma_addr += (dev_entry->userptr_lookup -376sg_start);377if (first) {378first = false;379seq_puts(s, "\n");380seq_puts(s, " user virtual address dma address pid region start region size\n");381seq_puts(s, "---------------------------------------------------------------------------------------\n");382}383seq_printf(s, " 0x%-18llx 0x%-16llx %-8u 0x%-16llx %-12llu\n",384dev_entry->userptr_lookup,385(u64)dma_addr, userptr->pid,386userptr->addr, userptr->size);387}388total_npages += npages;389}390}391}392393spin_unlock(&dev_entry->userptr_spinlock);394395if (!first)396seq_puts(s, "\n");397398return 0;399}400401static ssize_t userptr_lookup_write(struct file *file, const char __user *buf,402size_t count, loff_t *f_pos)403{404struct seq_file *s = file->private_data;405struct hl_debugfs_entry *entry = s->private;406struct hl_dbg_device_entry *dev_entry = entry->dev_entry;407ssize_t rc;408u64 value;409410rc = kstrtoull_from_user(buf, count, 16, &value);411if (rc)412return rc;413414dev_entry->userptr_lookup = value;415416return count;417}418419static int mmu_show(struct seq_file *s, void *data)420{421struct hl_debugfs_entry *entry = s->private;422struct hl_dbg_device_entry *dev_entry = entry->dev_entry;423struct hl_device *hdev = dev_entry->hdev;424struct hl_ctx *ctx;425struct hl_mmu_hop_info hops_info = {0};426u64 virt_addr = dev_entry->mmu_addr, phys_addr;427int i;428429if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)430ctx = hdev->kernel_ctx;431else432ctx = hl_get_compute_ctx(hdev);433434if (!ctx) {435dev_err(hdev->dev, "no ctx available\n");436return 0;437}438439if (hl_mmu_get_tlb_info(ctx, virt_addr, &hops_info)) {440dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",441virt_addr);442goto put_ctx;443}444445hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr);446447if (hops_info.scrambled_vaddr &&448(dev_entry->mmu_addr != hops_info.scrambled_vaddr))449seq_printf(s,450"asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx,\nphys_addr: 0x%llx, scrambled_phys_addr: 0x%llx\n",451dev_entry->mmu_asid, dev_entry->mmu_addr,452hops_info.scrambled_vaddr,453hops_info.unscrambled_paddr, phys_addr);454else455seq_printf(s,456"asid: %u, virt_addr: 0x%llx, phys_addr: 0x%llx\n",457dev_entry->mmu_asid, dev_entry->mmu_addr, phys_addr);458459for (i = 0 ; i < hops_info.used_hops ; i++) {460seq_printf(s, "hop%d_addr: 0x%llx\n",461i, hops_info.hop_info[i].hop_addr);462seq_printf(s, "hop%d_pte_addr: 0x%llx\n",463i, hops_info.hop_info[i].hop_pte_addr);464seq_printf(s, "hop%d_pte: 0x%llx\n",465i, hops_info.hop_info[i].hop_pte_val);466}467468put_ctx:469if (dev_entry->mmu_asid != HL_KERNEL_ASID_ID)470hl_ctx_put(ctx);471472return 0;473}474475static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,476size_t count, loff_t *f_pos)477{478struct seq_file *s = file->private_data;479struct hl_debugfs_entry *entry = s->private;480struct hl_dbg_device_entry *dev_entry = entry->dev_entry;481struct hl_device *hdev = dev_entry->hdev;482char kbuf[MMU_KBUF_SIZE] = {0};483char *c;484ssize_t rc;485486if (count > sizeof(kbuf) - 1)487goto err;488if (copy_from_user(kbuf, buf, count))489goto err;490kbuf[count] = 0;491492c = strchr(kbuf, ' ');493if (!c)494goto err;495*c = '\0';496497rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid);498if (rc)499goto err;500501if (strncmp(c+1, "0x", 2))502goto err;503rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr);504if (rc)505goto err;506507return count;508509err:510dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n");511512return -EINVAL;513}514515static int mmu_ack_error(struct seq_file *s, void *data)516{517struct hl_debugfs_entry *entry = s->private;518struct hl_dbg_device_entry *dev_entry = entry->dev_entry;519struct hl_device *hdev = dev_entry->hdev;520int rc;521522if (!dev_entry->mmu_cap_mask) {523dev_err(hdev->dev, "mmu_cap_mask is not set\n");524goto err;525}526527rc = hdev->asic_funcs->ack_mmu_errors(hdev, dev_entry->mmu_cap_mask);528if (rc)529goto err;530531return 0;532err:533return -EINVAL;534}535536static ssize_t mmu_ack_error_value_write(struct file *file,537const char __user *buf,538size_t count, loff_t *f_pos)539{540struct seq_file *s = file->private_data;541struct hl_debugfs_entry *entry = s->private;542struct hl_dbg_device_entry *dev_entry = entry->dev_entry;543struct hl_device *hdev = dev_entry->hdev;544char kbuf[MMU_KBUF_SIZE] = {0};545ssize_t rc;546547if (count > sizeof(kbuf) - 1)548goto err;549550if (copy_from_user(kbuf, buf, count))551goto err;552553kbuf[count] = 0;554555if (strncmp(kbuf, "0x", 2))556goto err;557558rc = kstrtoull(kbuf, 16, &dev_entry->mmu_cap_mask);559if (rc)560goto err;561562return count;563err:564dev_err(hdev->dev, "usage: echo <0xmmu_cap_mask > > mmu_error\n");565566return -EINVAL;567}568569static int engines_show(struct seq_file *s, void *data)570{571struct hl_debugfs_entry *entry = s->private;572struct hl_dbg_device_entry *dev_entry = entry->dev_entry;573struct hl_device *hdev = dev_entry->hdev;574struct engines_data eng_data;575576if (hdev->reset_info.in_reset) {577dev_warn_ratelimited(hdev->dev,578"Can't check device idle during reset\n");579return 0;580}581582eng_data.actual_size = 0;583eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE;584eng_data.buf = vmalloc(eng_data.allocated_buf_size);585if (!eng_data.buf)586return -ENOMEM;587588hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);589590if (eng_data.actual_size > eng_data.allocated_buf_size) {591dev_err(hdev->dev,592"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",593eng_data.actual_size, eng_data.allocated_buf_size);594vfree(eng_data.buf);595return -ENOMEM;596}597598seq_write(s, eng_data.buf, eng_data.actual_size);599600vfree(eng_data.buf);601602return 0;603}604605#ifdef CONFIG_HL_HLDIO606/* DIO debugfs functions following the standard pattern */607static int dio_ssd2hl_show(struct seq_file *s, void *data)608{609struct hl_debugfs_entry *entry = s->private;610struct hl_dbg_device_entry *dev_entry = entry->dev_entry;611struct hl_device *hdev = dev_entry->hdev;612613if (!hdev->asic_prop.supports_nvme) {614seq_puts(s, "NVMe Direct I/O not supported\\n");615return 0;616}617618seq_puts(s, "Usage: echo \"fd=N va=0xADDR off=N len=N\" > dio_ssd2hl\n");619seq_printf(s, "Last transfer: %zu bytes\\n", dev_entry->dio_stats.last_len_read);620seq_puts(s, "Note: All parameters must be page-aligned (4KB)\\n");621622return 0;623}624625static ssize_t dio_ssd2hl_write(struct file *file, const char __user *buf,626size_t count, loff_t *f_pos)627{628struct seq_file *s = file->private_data;629struct hl_debugfs_entry *entry = s->private;630struct hl_dbg_device_entry *dev_entry = entry->dev_entry;631struct hl_device *hdev = dev_entry->hdev;632struct hl_ctx *ctx = hdev->kernel_ctx;633char kbuf[128];634u64 device_va = 0, off_bytes = 0, len_bytes = 0;635u32 fd = 0;636size_t len_read = 0;637int rc, parsed;638639if (!hdev->asic_prop.supports_nvme)640return -EOPNOTSUPP;641642if (count >= sizeof(kbuf))643return -EINVAL;644645if (copy_from_user(kbuf, buf, count))646return -EFAULT;647648kbuf[count] = 0;649650/* Parse: fd=N va=0xADDR off=N len=N */651parsed = sscanf(kbuf, "fd=%u va=0x%llx off=%llu len=%llu",652&fd, &device_va, &off_bytes, &len_bytes);653if (parsed != 4) {654dev_err(hdev->dev, "Invalid format. Expected: fd=N va=0xADDR off=N len=N\\n");655return -EINVAL;656}657658/* Validate file descriptor */659if (fd == 0) {660dev_err(hdev->dev, "Invalid file descriptor: %u\\n", fd);661return -EINVAL;662}663664/* Validate alignment requirements */665if (!IS_ALIGNED(device_va, PAGE_SIZE) ||666!IS_ALIGNED(off_bytes, PAGE_SIZE) ||667!IS_ALIGNED(len_bytes, PAGE_SIZE)) {668dev_err(hdev->dev,669"All parameters must be page-aligned (4KB)\\n");670return -EINVAL;671}672673/* Validate transfer size */674if (len_bytes == 0 || len_bytes > SZ_1G) {675dev_err(hdev->dev, "Invalid length: %llu (max 1GB)\\n",676len_bytes);677return -EINVAL;678}679680dev_dbg(hdev->dev, "DIO SSD2HL: fd=%u va=0x%llx off=%llu len=%llu\\n",681fd, device_va, off_bytes, len_bytes);682683rc = hl_dio_ssd2hl(hdev, ctx, fd, device_va, off_bytes, len_bytes, &len_read);684if (rc < 0) {685dev_entry->dio_stats.failed_ops++;686dev_err(hdev->dev, "SSD2HL operation failed: %d\\n", rc);687return rc;688}689690/* Update statistics */691dev_entry->dio_stats.total_ops++;692dev_entry->dio_stats.successful_ops++;693dev_entry->dio_stats.bytes_transferred += len_read;694dev_entry->dio_stats.last_len_read = len_read;695696dev_dbg(hdev->dev, "DIO SSD2HL completed: %zu bytes transferred\\n", len_read);697698return count;699}700701static int dio_hl2ssd_show(struct seq_file *s, void *data)702{703seq_puts(s, "HL2SSD (device-to-SSD) transfers not implemented\\n");704return 0;705}706707static ssize_t dio_hl2ssd_write(struct file *file, const char __user *buf,708size_t count, loff_t *f_pos)709{710struct seq_file *s = file->private_data;711struct hl_debugfs_entry *entry = s->private;712struct hl_dbg_device_entry *dev_entry = entry->dev_entry;713struct hl_device *hdev = dev_entry->hdev;714715if (!hdev->asic_prop.supports_nvme)716return -EOPNOTSUPP;717718dev_dbg(hdev->dev, "HL2SSD operation not implemented\\n");719return -EOPNOTSUPP;720}721722static int dio_stats_show(struct seq_file *s, void *data)723{724struct hl_debugfs_entry *entry = s->private;725struct hl_dbg_device_entry *dev_entry = entry->dev_entry;726struct hl_device *hdev = dev_entry->hdev;727struct hl_dio_stats *stats = &dev_entry->dio_stats;728u64 avg_bytes_per_op = 0, success_rate = 0;729730if (!hdev->asic_prop.supports_nvme) {731seq_puts(s, "NVMe Direct I/O not supported\\n");732return 0;733}734735if (stats->successful_ops > 0)736avg_bytes_per_op = stats->bytes_transferred / stats->successful_ops;737738if (stats->total_ops > 0)739success_rate = (stats->successful_ops * 100) / stats->total_ops;740741seq_puts(s, "=== Habanalabs Direct I/O Statistics ===\\n");742seq_printf(s, "Total operations: %llu\\n", stats->total_ops);743seq_printf(s, "Successful ops: %llu\\n", stats->successful_ops);744seq_printf(s, "Failed ops: %llu\\n", stats->failed_ops);745seq_printf(s, "Success rate: %llu%%\\n", success_rate);746seq_printf(s, "Total bytes: %llu\\n", stats->bytes_transferred);747seq_printf(s, "Avg bytes per op: %llu\\n", avg_bytes_per_op);748seq_printf(s, "Last transfer: %zu bytes\\n", stats->last_len_read);749750return 0;751}752753static int dio_reset_show(struct seq_file *s, void *data)754{755seq_puts(s, "Write '1' to reset DIO statistics\\n");756return 0;757}758759static ssize_t dio_reset_write(struct file *file, const char __user *buf,760size_t count, loff_t *f_pos)761{762struct seq_file *s = file->private_data;763struct hl_debugfs_entry *entry = s->private;764struct hl_dbg_device_entry *dev_entry = entry->dev_entry;765struct hl_device *hdev = dev_entry->hdev;766char kbuf[8];767unsigned long val;768int rc;769770if (!hdev->asic_prop.supports_nvme)771return -EOPNOTSUPP;772773if (count >= sizeof(kbuf))774return -EINVAL;775776if (copy_from_user(kbuf, buf, count))777return -EFAULT;778779kbuf[count] = 0;780781rc = kstrtoul(kbuf, 0, &val);782if (rc)783return rc;784785if (val == 1) {786memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));787dev_dbg(hdev->dev, "DIO statistics reset\\n");788} else {789dev_err(hdev->dev, "Write '1' to reset statistics\\n");790return -EINVAL;791}792793return count;794}795#endif796797static ssize_t hl_memory_scrub(struct file *f, const char __user *buf,798size_t count, loff_t *ppos)799{800struct hl_dbg_device_entry *entry = file_inode(f)->i_private;801struct hl_device *hdev = entry->hdev;802u64 val = hdev->memory_scrub_val;803int rc;804805if (!hl_device_operational(hdev, NULL)) {806dev_warn_ratelimited(hdev->dev, "Can't scrub memory, device is not operational\n");807return -EIO;808}809810mutex_lock(&hdev->fpriv_list_lock);811if (hdev->is_compute_ctx_active) {812mutex_unlock(&hdev->fpriv_list_lock);813dev_err(hdev->dev, "can't scrub dram, context exist\n");814return -EBUSY;815}816hdev->is_in_dram_scrub = true;817mutex_unlock(&hdev->fpriv_list_lock);818819rc = hdev->asic_funcs->scrub_device_dram(hdev, val);820821mutex_lock(&hdev->fpriv_list_lock);822hdev->is_in_dram_scrub = false;823mutex_unlock(&hdev->fpriv_list_lock);824825if (rc)826return rc;827return count;828}829830static bool hl_is_device_va(struct hl_device *hdev, u64 addr)831{832struct asic_fixed_properties *prop = &hdev->asic_prop;833834if (prop->dram_supports_virtual_memory &&835(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))836return true;837838if (addr >= prop->pmmu.start_addr &&839addr < prop->pmmu.end_addr)840return true;841842if (addr >= prop->pmmu_huge.start_addr &&843addr < prop->pmmu_huge.end_addr)844return true;845846return false;847}848849static bool hl_is_device_internal_memory_va(struct hl_device *hdev, u64 addr,850u32 size)851{852struct asic_fixed_properties *prop = &hdev->asic_prop;853u64 dram_start_addr, dram_end_addr;854855if (prop->dram_supports_virtual_memory) {856dram_start_addr = prop->dmmu.start_addr;857dram_end_addr = prop->dmmu.end_addr;858} else {859dram_start_addr = prop->dram_base_address;860dram_end_addr = prop->dram_end_address;861}862863if (hl_mem_area_inside_range(addr, size, dram_start_addr,864dram_end_addr))865return true;866867if (hl_mem_area_inside_range(addr, size, prop->sram_base_address,868prop->sram_end_address))869return true;870871return false;872}873874static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,875u64 *phys_addr)876{877struct hl_vm_phys_pg_pack *phys_pg_pack;878struct hl_ctx *ctx;879struct hl_vm_hash_node *hnode;880u64 end_address, range_size;881struct hl_userptr *userptr;882enum vm_type *vm_type;883bool valid = false;884int i, rc = 0;885886ctx = hl_get_compute_ctx(hdev);887888if (!ctx) {889dev_err(hdev->dev, "no ctx available\n");890return -EINVAL;891}892893/* Verify address is mapped */894mutex_lock(&ctx->mem_hash_lock);895hash_for_each(ctx->mem_hash, i, hnode, node) {896vm_type = hnode->ptr;897898if (*vm_type == VM_TYPE_USERPTR) {899userptr = hnode->ptr;900range_size = userptr->size;901} else {902phys_pg_pack = hnode->ptr;903range_size = phys_pg_pack->total_size;904}905906end_address = virt_addr + size;907if ((virt_addr >= hnode->vaddr) &&908(end_address <= hnode->vaddr + range_size)) {909valid = true;910break;911}912}913mutex_unlock(&ctx->mem_hash_lock);914915if (!valid) {916dev_err(hdev->dev,917"virt addr 0x%llx is not mapped\n",918virt_addr);919rc = -EINVAL;920goto put_ctx;921}922923rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr);924if (rc) {925dev_err(hdev->dev,926"virt addr 0x%llx is not mapped to phys addr\n",927virt_addr);928rc = -EINVAL;929}930931put_ctx:932hl_ctx_put(ctx);933934return rc;935}936937static int hl_access_dev_mem_by_region(struct hl_device *hdev, u64 addr,938u64 *val, enum debugfs_access_type acc_type, bool *found)939{940size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ?941sizeof(u64) : sizeof(u32);942struct pci_mem_region *mem_reg;943int i;944945for (i = 0; i < PCI_REGION_NUMBER; i++) {946mem_reg = &hdev->pci_mem_region[i];947if (!mem_reg->used)948continue;949if (addr >= mem_reg->region_base &&950addr <= mem_reg->region_base + mem_reg->region_size - acc_size) {951*found = true;952return hdev->asic_funcs->access_dev_mem(hdev, i, addr, val, acc_type);953}954}955return 0;956}957958static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val,959enum debugfs_access_type acc_type)960{961struct asic_fixed_properties *prop = &hdev->asic_prop;962u64 offset = prop->device_dma_offset_for_host_access;963964switch (acc_type) {965case DEBUGFS_READ32:966*val = *(u32 *) phys_to_virt(addr - offset);967break;968case DEBUGFS_WRITE32:969*(u32 *) phys_to_virt(addr - offset) = *val;970break;971case DEBUGFS_READ64:972*val = *(u64 *) phys_to_virt(addr - offset);973break;974case DEBUGFS_WRITE64:975*(u64 *) phys_to_virt(addr - offset) = *val;976break;977default:978dev_err(hdev->dev, "hostmem access-type %d id not supported\n", acc_type);979break;980}981}982983static void dump_cfg_access_entry(struct hl_device *hdev,984struct hl_debugfs_cfg_access_entry *entry)985{986char *access_type = "";987struct tm tm;988989switch (entry->debugfs_type) {990case DEBUGFS_READ32:991access_type = "READ32 from";992break;993case DEBUGFS_WRITE32:994access_type = "WRITE32 to";995break;996case DEBUGFS_READ64:997access_type = "READ64 from";998break;999case DEBUGFS_WRITE64:1000access_type = "WRITE64 to";1001break;1002default:1003dev_err(hdev->dev, "Invalid DEBUGFS access type (%u)\n", entry->debugfs_type);1004return;1005}10061007time64_to_tm(entry->seconds_since_epoch, 0, &tm);1008dev_info(hdev->dev,1009"%ld-%02d-%02d %02d:%02d:%02d (UTC): %s %#llx\n", tm.tm_year + 1900, tm.tm_mon + 1,1010tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, access_type, entry->addr);1011}10121013void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)1014{1015struct hl_debugfs_cfg_access *dbgfs = &hdev->debugfs_cfg_accesses;1016u32 i, head, count = 0;1017time64_t entry_time, now;1018unsigned long flags;10191020now = ktime_get_real_seconds();10211022spin_lock_irqsave(&dbgfs->lock, flags);1023head = dbgfs->head;1024if (head == 0)1025i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;1026else1027i = head - 1;10281029/* Walk back until timeout or invalid entry */1030while (dbgfs->cfg_access_list[i].valid) {1031entry_time = dbgfs->cfg_access_list[i].seconds_since_epoch;1032/* Stop when entry is older than timeout */1033if (now - entry_time > HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC)1034break;10351036/* print single entry under lock */1037{1038struct hl_debugfs_cfg_access_entry entry = dbgfs->cfg_access_list[i];1039/*1040* We copy the entry out under lock and then print after1041* releasing the lock to minimize time under lock.1042*/1043spin_unlock_irqrestore(&dbgfs->lock, flags);1044dump_cfg_access_entry(hdev, &entry);1045spin_lock_irqsave(&dbgfs->lock, flags);1046}10471048/* mark consumed */1049dbgfs->cfg_access_list[i].valid = false;10501051if (i == 0)1052i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;1053else1054i--;1055count++;1056if (count >= HL_DBGFS_CFG_ACCESS_HIST_LEN)1057break;1058}1059spin_unlock_irqrestore(&dbgfs->lock, flags);1060}10611062static void check_if_cfg_access_and_log(struct hl_device *hdev, u64 addr, size_t access_size,1063enum debugfs_access_type access_type)1064{1065struct hl_debugfs_cfg_access *dbgfs_cfg_accesses = &hdev->debugfs_cfg_accesses;1066struct pci_mem_region *mem_reg = &hdev->pci_mem_region[PCI_REGION_CFG];1067struct hl_debugfs_cfg_access_entry *new_entry;1068unsigned long flags;10691070/* Check if address is in config memory */1071if (addr >= mem_reg->region_base &&1072mem_reg->region_size >= access_size &&1073addr <= mem_reg->region_base + mem_reg->region_size - access_size) {10741075spin_lock_irqsave(&dbgfs_cfg_accesses->lock, flags);10761077new_entry = &dbgfs_cfg_accesses->cfg_access_list[dbgfs_cfg_accesses->head];1078new_entry->seconds_since_epoch = ktime_get_real_seconds();1079new_entry->addr = addr;1080new_entry->debugfs_type = access_type;1081new_entry->valid = true;1082dbgfs_cfg_accesses->head = (dbgfs_cfg_accesses->head + 1)1083% HL_DBGFS_CFG_ACCESS_HIST_LEN;10841085spin_unlock_irqrestore(&dbgfs_cfg_accesses->lock, flags);10861087}1088}10891090static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,1091enum debugfs_access_type acc_type)1092{1093size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ?1094sizeof(u64) : sizeof(u32);1095u64 host_start = hdev->asic_prop.host_base_address;1096u64 host_end = hdev->asic_prop.host_end_address;1097bool user_address, found = false;1098int rc;10991100user_address = hl_is_device_va(hdev, addr);1101if (user_address) {1102rc = device_va_to_pa(hdev, addr, acc_size, &addr);1103if (rc)1104return rc;1105}11061107check_if_cfg_access_and_log(hdev, addr, acc_size, acc_type);1108rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found);1109if (rc) {1110dev_err(hdev->dev,1111"Failed reading addr %#llx from dev mem (%d)\n",1112addr, rc);1113return rc;1114}11151116if (found)1117return 0;11181119if (!user_address || device_iommu_mapped(&hdev->pdev->dev)) {1120rc = -EINVAL;1121goto err;1122}11231124if (addr >= host_start && addr <= host_end - acc_size) {1125hl_access_host_mem(hdev, addr, val, acc_type);1126} else {1127rc = -EINVAL;1128goto err;1129}11301131return 0;1132err:1133dev_err(hdev->dev, "invalid addr %#llx\n", addr);1134return rc;1135}11361137static ssize_t hl_data_read32(struct file *f, char __user *buf,1138size_t count, loff_t *ppos)1139{1140struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1141struct hl_device *hdev = entry->hdev;1142u64 value64, addr = entry->addr;1143char tmp_buf[32];1144ssize_t rc;1145u32 val;11461147if (hdev->reset_info.in_reset) {1148dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");1149return 0;1150}11511152if (*ppos)1153return 0;11541155rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_READ32);1156if (rc)1157return rc;11581159val = value64; /* downcast back to 32 */11601161sprintf(tmp_buf, "0x%08x\n", val);1162return simple_read_from_buffer(buf, count, ppos, tmp_buf,1163strlen(tmp_buf));1164}11651166static ssize_t hl_data_write32(struct file *f, const char __user *buf,1167size_t count, loff_t *ppos)1168{1169struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1170struct hl_device *hdev = entry->hdev;1171u64 value64, addr = entry->addr;1172u32 value;1173ssize_t rc;11741175if (hdev->reset_info.in_reset) {1176dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");1177return 0;1178}11791180rc = kstrtouint_from_user(buf, count, 16, &value);1181if (rc)1182return rc;11831184value64 = value;1185rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_WRITE32);1186if (rc)1187return rc;11881189return count;1190}11911192static ssize_t hl_data_read64(struct file *f, char __user *buf,1193size_t count, loff_t *ppos)1194{1195struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1196struct hl_device *hdev = entry->hdev;1197u64 addr = entry->addr;1198char tmp_buf[32];1199ssize_t rc;1200u64 val;12011202if (hdev->reset_info.in_reset) {1203dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");1204return 0;1205}12061207if (*ppos)1208return 0;12091210rc = hl_access_mem(hdev, addr, &val, DEBUGFS_READ64);1211if (rc)1212return rc;12131214sprintf(tmp_buf, "0x%016llx\n", val);1215return simple_read_from_buffer(buf, count, ppos, tmp_buf,1216strlen(tmp_buf));1217}12181219static ssize_t hl_data_write64(struct file *f, const char __user *buf,1220size_t count, loff_t *ppos)1221{1222struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1223struct hl_device *hdev = entry->hdev;1224u64 addr = entry->addr;1225u64 value;1226ssize_t rc;12271228if (hdev->reset_info.in_reset) {1229dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");1230return 0;1231}12321233rc = kstrtoull_from_user(buf, count, 16, &value);1234if (rc)1235return rc;12361237rc = hl_access_mem(hdev, addr, &value, DEBUGFS_WRITE64);1238if (rc)1239return rc;12401241return count;1242}12431244static ssize_t hl_dma_size_write(struct file *f, const char __user *buf,1245size_t count, loff_t *ppos)1246{1247struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1248struct hl_device *hdev = entry->hdev;1249u64 addr = entry->addr;1250ssize_t rc;1251u32 size;12521253if (hdev->reset_info.in_reset) {1254dev_warn_ratelimited(hdev->dev, "Can't DMA during reset\n");1255return 0;1256}1257rc = kstrtouint_from_user(buf, count, 16, &size);1258if (rc)1259return rc;12601261if (!size) {1262dev_err(hdev->dev, "DMA read failed. size can't be 0\n");1263return -EINVAL;1264}12651266if (size > SZ_128M) {1267dev_err(hdev->dev,1268"DMA read failed. size can't be larger than 128MB\n");1269return -EINVAL;1270}12711272if (!hl_is_device_internal_memory_va(hdev, addr, size)) {1273dev_err(hdev->dev,1274"DMA read failed. Invalid 0x%010llx + 0x%08x\n",1275addr, size);1276return -EINVAL;1277}12781279/* Free the previous allocation, if there was any */1280entry->data_dma_blob_desc.size = 0;1281vfree(entry->data_dma_blob_desc.data);12821283entry->data_dma_blob_desc.data = vmalloc(size);1284if (!entry->data_dma_blob_desc.data)1285return -ENOMEM;12861287rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size,1288entry->data_dma_blob_desc.data);1289if (rc) {1290dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr);1291vfree(entry->data_dma_blob_desc.data);1292entry->data_dma_blob_desc.data = NULL;1293return -EIO;1294}12951296entry->data_dma_blob_desc.size = size;12971298return count;1299}13001301static ssize_t hl_monitor_dump_trigger(struct file *f, const char __user *buf,1302size_t count, loff_t *ppos)1303{1304struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1305struct hl_device *hdev = entry->hdev;1306u32 size, trig;1307ssize_t rc;13081309if (hdev->reset_info.in_reset) {1310dev_warn_ratelimited(hdev->dev, "Can't dump monitors during reset\n");1311return 0;1312}1313rc = kstrtouint_from_user(buf, count, 10, &trig);1314if (rc)1315return rc;13161317if (trig != 1) {1318dev_err(hdev->dev, "Must write 1 to trigger monitor dump\n");1319return -EINVAL;1320}13211322size = sizeof(struct cpucp_monitor_dump);13231324/* Free the previous allocation, if there was any */1325entry->mon_dump_blob_desc.size = 0;1326vfree(entry->mon_dump_blob_desc.data);13271328entry->mon_dump_blob_desc.data = vmalloc(size);1329if (!entry->mon_dump_blob_desc.data)1330return -ENOMEM;13311332rc = hdev->asic_funcs->get_monitor_dump(hdev, entry->mon_dump_blob_desc.data);1333if (rc) {1334dev_err(hdev->dev, "Failed to dump monitors\n");1335vfree(entry->mon_dump_blob_desc.data);1336entry->mon_dump_blob_desc.data = NULL;1337return -EIO;1338}13391340entry->mon_dump_blob_desc.size = size;13411342return count;1343}13441345static ssize_t hl_get_power_state(struct file *f, char __user *buf,1346size_t count, loff_t *ppos)1347{1348struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1349struct hl_device *hdev = entry->hdev;1350char tmp_buf[200];1351int i;13521353if (*ppos)1354return 0;13551356if (hdev->pdev->current_state == PCI_D0)1357i = 1;1358else if (hdev->pdev->current_state == PCI_D3hot)1359i = 2;1360else1361i = 3;13621363sprintf(tmp_buf,1364"current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i);1365return simple_read_from_buffer(buf, count, ppos, tmp_buf,1366strlen(tmp_buf));1367}13681369static ssize_t hl_set_power_state(struct file *f, const char __user *buf,1370size_t count, loff_t *ppos)1371{1372struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1373struct hl_device *hdev = entry->hdev;1374u32 value;1375ssize_t rc;13761377rc = kstrtouint_from_user(buf, count, 10, &value);1378if (rc)1379return rc;13801381if (value == 1) {1382pci_set_power_state(hdev->pdev, PCI_D0);1383pci_restore_state(hdev->pdev);1384rc = pci_enable_device(hdev->pdev);1385if (rc < 0)1386return rc;1387} else if (value == 2) {1388pci_save_state(hdev->pdev);1389pci_disable_device(hdev->pdev);1390pci_set_power_state(hdev->pdev, PCI_D3hot);1391} else {1392dev_dbg(hdev->dev, "invalid power state value %u\n", value);1393return -EINVAL;1394}13951396return count;1397}13981399static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,1400size_t count, loff_t *ppos)1401{1402struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1403struct hl_device *hdev = entry->hdev;1404char tmp_buf[32];1405u64 val;1406ssize_t rc;14071408if (*ppos)1409return 0;14101411rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,1412entry->i2c_reg, entry->i2c_len, &val);1413if (rc) {1414dev_err(hdev->dev,1415"Failed to read from I2C bus %d, addr %d, reg %d, len %d\n",1416entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len);1417return rc;1418}14191420sprintf(tmp_buf, "%#02llx\n", val);1421rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,1422strlen(tmp_buf));14231424return rc;1425}14261427static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,1428size_t count, loff_t *ppos)1429{1430struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1431struct hl_device *hdev = entry->hdev;1432u64 value;1433ssize_t rc;14341435rc = kstrtou64_from_user(buf, count, 16, &value);1436if (rc)1437return rc;14381439rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,1440entry->i2c_reg, entry->i2c_len, value);1441if (rc) {1442dev_err(hdev->dev,1443"Failed to write %#02llx to I2C bus %d, addr %d, reg %d, len %d\n",1444value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len);1445return rc;1446}14471448return count;1449}14501451static ssize_t hl_led0_write(struct file *f, const char __user *buf,1452size_t count, loff_t *ppos)1453{1454struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1455struct hl_device *hdev = entry->hdev;1456u32 value;1457ssize_t rc;14581459rc = kstrtouint_from_user(buf, count, 10, &value);1460if (rc)1461return rc;14621463value = value ? 1 : 0;14641465hl_debugfs_led_set(hdev, 0, value);14661467return count;1468}14691470static ssize_t hl_led1_write(struct file *f, const char __user *buf,1471size_t count, loff_t *ppos)1472{1473struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1474struct hl_device *hdev = entry->hdev;1475u32 value;1476ssize_t rc;14771478rc = kstrtouint_from_user(buf, count, 10, &value);1479if (rc)1480return rc;14811482value = value ? 1 : 0;14831484hl_debugfs_led_set(hdev, 1, value);14851486return count;1487}14881489static ssize_t hl_led2_write(struct file *f, const char __user *buf,1490size_t count, loff_t *ppos)1491{1492struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1493struct hl_device *hdev = entry->hdev;1494u32 value;1495ssize_t rc;14961497rc = kstrtouint_from_user(buf, count, 10, &value);1498if (rc)1499return rc;15001501value = value ? 1 : 0;15021503hl_debugfs_led_set(hdev, 2, value);15041505return count;1506}15071508static ssize_t hl_device_read(struct file *f, char __user *buf,1509size_t count, loff_t *ppos)1510{1511static const char *help =1512"Valid values: disable, enable, suspend, resume, cpu_timeout\n";1513return simple_read_from_buffer(buf, count, ppos, help, strlen(help));1514}15151516static ssize_t hl_device_write(struct file *f, const char __user *buf,1517size_t count, loff_t *ppos)1518{1519struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1520struct hl_device *hdev = entry->hdev;1521char data[30] = {0};15221523/* don't allow partial writes */1524if (*ppos != 0)1525return 0;15261527simple_write_to_buffer(data, 29, ppos, buf, count);15281529if (strncmp("disable", data, strlen("disable")) == 0) {1530hdev->disabled = true;1531} else if (strncmp("enable", data, strlen("enable")) == 0) {1532hdev->disabled = false;1533} else if (strncmp("suspend", data, strlen("suspend")) == 0) {1534hdev->asic_funcs->suspend(hdev);1535} else if (strncmp("resume", data, strlen("resume")) == 0) {1536hdev->asic_funcs->resume(hdev);1537} else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {1538hdev->device_cpu_disabled = true;1539} else {1540dev_err(hdev->dev,1541"Valid values: disable, enable, suspend, resume, cpu_timeout\n");1542count = -EINVAL;1543}15441545return count;1546}15471548static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,1549size_t count, loff_t *ppos)1550{1551return 0;1552}15531554static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,1555size_t count, loff_t *ppos)1556{1557return count;1558}15591560static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,1561size_t count, loff_t *ppos)1562{1563struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1564struct hl_device *hdev = entry->hdev;1565char tmp_buf[200];1566ssize_t rc;15671568if (!hdev->asic_prop.configurable_stop_on_err)1569return -EOPNOTSUPP;15701571if (*ppos)1572return 0;15731574sprintf(tmp_buf, "%d\n", hdev->stop_on_err);1575rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,1576strlen(tmp_buf) + 1);15771578return rc;1579}15801581static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,1582size_t count, loff_t *ppos)1583{1584struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1585struct hl_device *hdev = entry->hdev;1586u32 value;1587ssize_t rc;15881589if (!hdev->asic_prop.configurable_stop_on_err)1590return -EOPNOTSUPP;15911592if (hdev->reset_info.in_reset) {1593dev_warn_ratelimited(hdev->dev,1594"Can't change stop on error during reset\n");1595return 0;1596}15971598rc = kstrtouint_from_user(buf, count, 10, &value);1599if (rc)1600return rc;16011602hdev->stop_on_err = value ? 1 : 0;16031604hl_device_reset(hdev, 0);16051606return count;1607}16081609static ssize_t hl_security_violations_read(struct file *f, char __user *buf,1610size_t count, loff_t *ppos)1611{1612struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1613struct hl_device *hdev = entry->hdev;16141615hdev->asic_funcs->ack_protection_bits_errors(hdev);16161617return 0;1618}16191620static ssize_t hl_state_dump_read(struct file *f, char __user *buf,1621size_t count, loff_t *ppos)1622{1623struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1624ssize_t rc;16251626down_read(&entry->state_dump_sem);1627if (!entry->state_dump[entry->state_dump_head])1628rc = 0;1629else1630rc = simple_read_from_buffer(1631buf, count, ppos,1632entry->state_dump[entry->state_dump_head],1633strlen(entry->state_dump[entry->state_dump_head]));1634up_read(&entry->state_dump_sem);16351636return rc;1637}16381639static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,1640size_t count, loff_t *ppos)1641{1642struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1643struct hl_device *hdev = entry->hdev;1644ssize_t rc;1645u32 size;1646int i;16471648rc = kstrtouint_from_user(buf, count, 10, &size);1649if (rc)1650return rc;16511652if (size <= 0 || size >= ARRAY_SIZE(entry->state_dump)) {1653dev_err(hdev->dev, "Invalid number of dumps to skip\n");1654return -EINVAL;1655}16561657if (entry->state_dump[entry->state_dump_head]) {1658down_write(&entry->state_dump_sem);1659for (i = 0; i < size; ++i) {1660vfree(entry->state_dump[entry->state_dump_head]);1661entry->state_dump[entry->state_dump_head] = NULL;1662if (entry->state_dump_head > 0)1663entry->state_dump_head--;1664else1665entry->state_dump_head =1666ARRAY_SIZE(entry->state_dump) - 1;1667}1668up_write(&entry->state_dump_sem);1669}16701671return count;1672}16731674static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,1675size_t count, loff_t *ppos)1676{1677struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1678struct hl_device *hdev = entry->hdev;1679char tmp_buf[200];1680ssize_t rc;16811682if (*ppos)1683return 0;16841685sprintf(tmp_buf, "%d\n",1686jiffies_to_msecs(hdev->timeout_jiffies) / 1000);1687rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,1688strlen(tmp_buf) + 1);16891690return rc;1691}16921693static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,1694size_t count, loff_t *ppos)1695{1696struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1697struct hl_device *hdev = entry->hdev;1698u32 value;1699ssize_t rc;17001701rc = kstrtouint_from_user(buf, count, 10, &value);1702if (rc)1703return rc;17041705if (value)1706hdev->timeout_jiffies = secs_to_jiffies(value);1707else1708hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;17091710return count;1711}17121713static ssize_t hl_check_razwi_happened(struct file *f, char __user *buf,1714size_t count, loff_t *ppos)1715{1716struct hl_dbg_device_entry *entry = file_inode(f)->i_private;1717struct hl_device *hdev = entry->hdev;17181719hdev->asic_funcs->check_if_razwi_happened(hdev);17201721return 0;1722}17231724static const struct file_operations hl_mem_scrub_fops = {1725.owner = THIS_MODULE,1726.write = hl_memory_scrub,1727};17281729static const struct file_operations hl_data32b_fops = {1730.owner = THIS_MODULE,1731.read = hl_data_read32,1732.write = hl_data_write321733};17341735static const struct file_operations hl_data64b_fops = {1736.owner = THIS_MODULE,1737.read = hl_data_read64,1738.write = hl_data_write641739};17401741static const struct file_operations hl_dma_size_fops = {1742.owner = THIS_MODULE,1743.write = hl_dma_size_write1744};17451746static const struct file_operations hl_monitor_dump_fops = {1747.owner = THIS_MODULE,1748.write = hl_monitor_dump_trigger1749};17501751static const struct file_operations hl_i2c_data_fops = {1752.owner = THIS_MODULE,1753.read = hl_i2c_data_read,1754.write = hl_i2c_data_write1755};17561757static const struct file_operations hl_power_fops = {1758.owner = THIS_MODULE,1759.read = hl_get_power_state,1760.write = hl_set_power_state1761};17621763static const struct file_operations hl_led0_fops = {1764.owner = THIS_MODULE,1765.write = hl_led0_write1766};17671768static const struct file_operations hl_led1_fops = {1769.owner = THIS_MODULE,1770.write = hl_led1_write1771};17721773static const struct file_operations hl_led2_fops = {1774.owner = THIS_MODULE,1775.write = hl_led2_write1776};17771778static const struct file_operations hl_device_fops = {1779.owner = THIS_MODULE,1780.read = hl_device_read,1781.write = hl_device_write1782};17831784static const struct file_operations hl_clk_gate_fops = {1785.owner = THIS_MODULE,1786.read = hl_clk_gate_read,1787.write = hl_clk_gate_write1788};17891790static const struct file_operations hl_stop_on_err_fops = {1791.owner = THIS_MODULE,1792.read = hl_stop_on_err_read,1793.write = hl_stop_on_err_write1794};17951796static const struct file_operations hl_security_violations_fops = {1797.owner = THIS_MODULE,1798.read = hl_security_violations_read1799};18001801static const struct file_operations hl_state_dump_fops = {1802.owner = THIS_MODULE,1803.read = hl_state_dump_read,1804.write = hl_state_dump_write1805};18061807static const struct file_operations hl_timeout_locked_fops = {1808.owner = THIS_MODULE,1809.read = hl_timeout_locked_read,1810.write = hl_timeout_locked_write1811};18121813static const struct file_operations hl_razwi_check_fops = {1814.owner = THIS_MODULE,1815.read = hl_check_razwi_happened1816};18171818static const struct hl_info_list hl_debugfs_list[] = {1819{"command_buffers", command_buffers_show, NULL},1820{"command_submission", command_submission_show, NULL},1821{"command_submission_jobs", command_submission_jobs_show, NULL},1822{"userptr", userptr_show, NULL},1823{"vm", vm_show, NULL},1824{"userptr_lookup", userptr_lookup_show, userptr_lookup_write},1825{"mmu", mmu_show, mmu_asid_va_write},1826{"mmu_error", mmu_ack_error, mmu_ack_error_value_write},1827{"engines", engines_show, NULL},1828#ifdef CONFIG_HL_HLDIO1829/* DIO entries - only created if NVMe is supported */1830{"dio_ssd2hl", dio_ssd2hl_show, dio_ssd2hl_write},1831{"dio_stats", dio_stats_show, NULL},1832{"dio_reset", dio_reset_show, dio_reset_write},1833{"dio_hl2ssd", dio_hl2ssd_show, dio_hl2ssd_write},1834#endif1835};18361837static int hl_debugfs_open(struct inode *inode, struct file *file)1838{1839struct hl_debugfs_entry *node = inode->i_private;18401841return single_open(file, node->info_ent->show, node);1842}18431844static ssize_t hl_debugfs_write(struct file *file, const char __user *buf,1845size_t count, loff_t *f_pos)1846{1847struct hl_debugfs_entry *node = file->f_inode->i_private;18481849if (node->info_ent->write)1850return node->info_ent->write(file, buf, count, f_pos);1851else1852return -EINVAL;18531854}18551856static const struct file_operations hl_debugfs_fops = {1857.owner = THIS_MODULE,1858.open = hl_debugfs_open,1859.read = seq_read,1860.write = hl_debugfs_write,1861.llseek = seq_lseek,1862.release = single_release,1863};18641865static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry, struct dentry *root)1866{1867debugfs_create_u8("i2c_bus",18680644,1869root,1870&dev_entry->i2c_bus);18711872debugfs_create_u8("i2c_addr",18730644,1874root,1875&dev_entry->i2c_addr);18761877debugfs_create_u8("i2c_reg",18780644,1879root,1880&dev_entry->i2c_reg);18811882debugfs_create_u8("i2c_len",18830644,1884root,1885&dev_entry->i2c_len);18861887debugfs_create_file("i2c_data",18880644,1889root,1890dev_entry,1891&hl_i2c_data_fops);18921893debugfs_create_file("led0",18940200,1895root,1896dev_entry,1897&hl_led0_fops);18981899debugfs_create_file("led1",19000200,1901root,1902dev_entry,1903&hl_led1_fops);19041905debugfs_create_file("led2",19060200,1907root,1908dev_entry,1909&hl_led2_fops);1910}19111912static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_entry *dev_entry,1913struct dentry *root)1914{1915int count = ARRAY_SIZE(hl_debugfs_list);1916struct hl_debugfs_entry *entry;1917int i;19181919debugfs_create_x64("memory_scrub_val",19200644,1921root,1922&hdev->memory_scrub_val);19231924debugfs_create_file("memory_scrub",19250200,1926root,1927dev_entry,1928&hl_mem_scrub_fops);19291930debugfs_create_x64("addr",19310644,1932root,1933&dev_entry->addr);19341935debugfs_create_file("data32",19360644,1937root,1938dev_entry,1939&hl_data32b_fops);19401941debugfs_create_file("data64",19420644,1943root,1944dev_entry,1945&hl_data64b_fops);19461947debugfs_create_file("set_power_state",19480644,1949root,1950dev_entry,1951&hl_power_fops);19521953debugfs_create_file("device",19540644,1955root,1956dev_entry,1957&hl_device_fops);19581959debugfs_create_file("clk_gate",19600644,1961root,1962dev_entry,1963&hl_clk_gate_fops);19641965debugfs_create_file("stop_on_err",19660644,1967root,1968dev_entry,1969&hl_stop_on_err_fops);19701971debugfs_create_file("dump_security_violations",19720400,1973root,1974dev_entry,1975&hl_security_violations_fops);19761977debugfs_create_file("dump_razwi_events",19780400,1979root,1980dev_entry,1981&hl_razwi_check_fops);19821983debugfs_create_file("dma_size",19840200,1985root,1986dev_entry,1987&hl_dma_size_fops);19881989debugfs_create_blob("data_dma",19900400,1991root,1992&dev_entry->data_dma_blob_desc);19931994debugfs_create_file("monitor_dump_trig",19950200,1996root,1997dev_entry,1998&hl_monitor_dump_fops);19992000debugfs_create_blob("monitor_dump",20010400,2002root,2003&dev_entry->mon_dump_blob_desc);20042005debugfs_create_x8("skip_reset_on_timeout",20060644,2007root,2008&hdev->reset_info.skip_reset_on_timeout);20092010debugfs_create_file("state_dump",20110644,2012root,2013dev_entry,2014&hl_state_dump_fops);20152016debugfs_create_file("timeout_locked",20170644,2018root,2019dev_entry,2020&hl_timeout_locked_fops);20212022debugfs_create_u32("device_release_watchdog_timeout",20230644,2024root,2025&hdev->device_release_watchdog_timeout_sec);20262027debugfs_create_u16("server_type",20280444,2029root,2030&hdev->asic_prop.server_type);20312032for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {2033/* Skip DIO entries if NVMe is not supported */2034if (strncmp(hl_debugfs_list[i].name, "dio_", 4) == 0 &&2035!hdev->asic_prop.supports_nvme)2036continue;20372038debugfs_create_file(hl_debugfs_list[i].name,20390644,2040root,2041entry,2042&hl_debugfs_fops);2043entry->info_ent = &hl_debugfs_list[i];2044entry->dev_entry = dev_entry;2045}2046}20472048int hl_debugfs_device_init(struct hl_device *hdev)2049{2050struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;2051int count = ARRAY_SIZE(hl_debugfs_list);20522053dev_entry->hdev = hdev;2054dev_entry->entry_arr = kmalloc_array(count, sizeof(struct hl_debugfs_entry), GFP_KERNEL);2055if (!dev_entry->entry_arr)2056return -ENOMEM;20572058dev_entry->data_dma_blob_desc.size = 0;2059dev_entry->data_dma_blob_desc.data = NULL;2060dev_entry->mon_dump_blob_desc.size = 0;2061dev_entry->mon_dump_blob_desc.data = NULL;20622063INIT_LIST_HEAD(&dev_entry->file_list);2064INIT_LIST_HEAD(&dev_entry->cb_list);2065INIT_LIST_HEAD(&dev_entry->cs_list);2066INIT_LIST_HEAD(&dev_entry->cs_job_list);2067INIT_LIST_HEAD(&dev_entry->userptr_list);2068INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);2069mutex_init(&dev_entry->file_mutex);2070init_rwsem(&dev_entry->state_dump_sem);2071spin_lock_init(&dev_entry->cb_spinlock);2072spin_lock_init(&dev_entry->cs_spinlock);2073spin_lock_init(&dev_entry->cs_job_spinlock);2074spin_lock_init(&dev_entry->userptr_spinlock);2075mutex_init(&dev_entry->ctx_mem_hash_mutex);20762077spin_lock_init(&hdev->debugfs_cfg_accesses.lock);2078hdev->debugfs_cfg_accesses.head = 0; /* already zero by alloc but explicit init is fine */20792080#ifdef CONFIG_HL_HLDIO2081/* Initialize DIO statistics */2082memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));2083#endif20842085return 0;2086}20872088void hl_debugfs_device_fini(struct hl_device *hdev)2089{2090struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;2091int i;20922093mutex_destroy(&entry->ctx_mem_hash_mutex);2094mutex_destroy(&entry->file_mutex);20952096vfree(entry->data_dma_blob_desc.data);2097vfree(entry->mon_dump_blob_desc.data);20982099for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)2100vfree(entry->state_dump[i]);21012102kfree(entry->entry_arr);21032104}21052106void hl_debugfs_add_device(struct hl_device *hdev)2107{2108struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;21092110dev_entry->root = hdev->drm.accel->debugfs_root;21112112add_files_to_device(hdev, dev_entry, dev_entry->root);21132114if (!hdev->asic_prop.fw_security_enabled)2115add_secured_nodes(dev_entry, dev_entry->root);21162117}21182119void hl_debugfs_add_file(struct hl_fpriv *hpriv)2120{2121struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;21222123mutex_lock(&dev_entry->file_mutex);2124list_add(&hpriv->debugfs_list, &dev_entry->file_list);2125mutex_unlock(&dev_entry->file_mutex);2126}21272128void hl_debugfs_remove_file(struct hl_fpriv *hpriv)2129{2130struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;21312132mutex_lock(&dev_entry->file_mutex);2133list_del(&hpriv->debugfs_list);2134mutex_unlock(&dev_entry->file_mutex);2135}21362137void hl_debugfs_add_cb(struct hl_cb *cb)2138{2139struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;21402141spin_lock(&dev_entry->cb_spinlock);2142list_add(&cb->debugfs_list, &dev_entry->cb_list);2143spin_unlock(&dev_entry->cb_spinlock);2144}21452146void hl_debugfs_remove_cb(struct hl_cb *cb)2147{2148struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;21492150spin_lock(&dev_entry->cb_spinlock);2151list_del(&cb->debugfs_list);2152spin_unlock(&dev_entry->cb_spinlock);2153}21542155void hl_debugfs_add_cs(struct hl_cs *cs)2156{2157struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;21582159spin_lock(&dev_entry->cs_spinlock);2160list_add(&cs->debugfs_list, &dev_entry->cs_list);2161spin_unlock(&dev_entry->cs_spinlock);2162}21632164void hl_debugfs_remove_cs(struct hl_cs *cs)2165{2166struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;21672168spin_lock(&dev_entry->cs_spinlock);2169list_del(&cs->debugfs_list);2170spin_unlock(&dev_entry->cs_spinlock);2171}21722173void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job)2174{2175struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;21762177spin_lock(&dev_entry->cs_job_spinlock);2178list_add(&job->debugfs_list, &dev_entry->cs_job_list);2179spin_unlock(&dev_entry->cs_job_spinlock);2180}21812182void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job)2183{2184struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;21852186spin_lock(&dev_entry->cs_job_spinlock);2187list_del(&job->debugfs_list);2188spin_unlock(&dev_entry->cs_job_spinlock);2189}21902191void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr)2192{2193struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;21942195spin_lock(&dev_entry->userptr_spinlock);2196list_add(&userptr->debugfs_list, &dev_entry->userptr_list);2197spin_unlock(&dev_entry->userptr_spinlock);2198}21992200void hl_debugfs_remove_userptr(struct hl_device *hdev,2201struct hl_userptr *userptr)2202{2203struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;22042205spin_lock(&dev_entry->userptr_spinlock);2206list_del(&userptr->debugfs_list);2207spin_unlock(&dev_entry->userptr_spinlock);2208}22092210void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)2211{2212struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;22132214mutex_lock(&dev_entry->ctx_mem_hash_mutex);2215list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);2216mutex_unlock(&dev_entry->ctx_mem_hash_mutex);2217}22182219void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)2220{2221struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;22222223mutex_lock(&dev_entry->ctx_mem_hash_mutex);2224list_del(&ctx->debugfs_list);2225mutex_unlock(&dev_entry->ctx_mem_hash_mutex);2226}22272228/**2229* hl_debugfs_set_state_dump - register state dump making it accessible via2230* debugfs2231* @hdev: pointer to the device structure2232* @data: the actual dump data2233* @length: the length of the data2234*/2235void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,2236unsigned long length)2237{2238struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;22392240down_write(&dev_entry->state_dump_sem);22412242dev_entry->state_dump_head = (dev_entry->state_dump_head + 1) %2243ARRAY_SIZE(dev_entry->state_dump);2244vfree(dev_entry->state_dump[dev_entry->state_dump_head]);2245dev_entry->state_dump[dev_entry->state_dump_head] = data;22462247up_write(&dev_entry->state_dump_sem);2248}2249225022512252