Path: blob/master/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
54339 views
// SPDX-License-Identifier: GPL-2.0 OR MIT1/*2* Copyright 2014-2022 Advanced Micro Devices, Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included in12* all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR20* OTHER DEALINGS IN THE SOFTWARE.21*/2223#include <linux/device.h>24#include <linux/err.h>25#include <linux/fs.h>26#include <linux/file.h>27#include <linux/sched.h>28#include <linux/slab.h>29#include <linux/uaccess.h>30#include <linux/compat.h>31#include <uapi/linux/kfd_ioctl.h>32#include <linux/time.h>33#include <linux/mm.h>34#include <linux/mman.h>35#include <linux/ptrace.h>36#include <linux/dma-buf.h>37#include <linux/processor.h>38#include "kfd_priv.h"39#include "kfd_device_queue_manager.h"40#include "kfd_svm.h"41#include "amdgpu_amdkfd.h"42#include "kfd_smi_events.h"43#include "amdgpu_dma_buf.h"44#include "kfd_debug.h"4546static long kfd_ioctl(struct file *, unsigned int, unsigned long);47static int kfd_open(struct inode *, struct file *);48static int kfd_release(struct inode *, struct file *);49static int kfd_mmap(struct file *, struct vm_area_struct *);5051static const char kfd_dev_name[] = "kfd";5253static const struct file_operations kfd_fops = {54.owner = THIS_MODULE,55.unlocked_ioctl = kfd_ioctl,56.compat_ioctl = compat_ptr_ioctl,57.open = kfd_open,58.release = kfd_release,59.mmap = kfd_mmap,60};6162static int kfd_char_dev_major = -1;63struct device *kfd_device;64static const struct class kfd_class = {65.name = kfd_dev_name,66};6768static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)69{70struct kfd_process_device *pdd;7172mutex_lock(&p->mutex);73pdd = kfd_process_device_data_by_id(p, gpu_id);7475if (pdd)76return pdd;7778mutex_unlock(&p->mutex);79return NULL;80}8182static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)83{84mutex_unlock(&pdd->process->mutex);85}8687int kfd_chardev_init(void)88{89int err = 0;9091kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);92err = kfd_char_dev_major;93if (err < 0)94goto err_register_chrdev;9596err = class_register(&kfd_class);97if (err)98goto err_class_create;99100kfd_device = device_create(&kfd_class, NULL,101MKDEV(kfd_char_dev_major, 0),102NULL, kfd_dev_name);103err = PTR_ERR(kfd_device);104if (IS_ERR(kfd_device))105goto err_device_create;106107return 0;108109err_device_create:110class_unregister(&kfd_class);111err_class_create:112unregister_chrdev(kfd_char_dev_major, kfd_dev_name);113err_register_chrdev:114return err;115}116117void kfd_chardev_exit(void)118{119device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0));120class_unregister(&kfd_class);121unregister_chrdev(kfd_char_dev_major, kfd_dev_name);122kfd_device = NULL;123}124125126static int kfd_open(struct inode *inode, struct file *filep)127{128struct kfd_process *process;129bool is_32bit_user_mode;130131if (iminor(inode) != 0)132return -ENODEV;133134is_32bit_user_mode = in_compat_syscall();135136if (is_32bit_user_mode) {137dev_warn(kfd_device,138"Process %d (32-bit) failed to open /dev/kfd\n"139"32-bit processes are not supported by amdkfd\n",140current->pid);141return -EPERM;142}143144process = kfd_create_process(current);145if (IS_ERR(process))146return PTR_ERR(process);147148if (kfd_process_init_cwsr_apu(process, filep)) {149kfd_unref_process(process);150return -EFAULT;151}152153/* filep now owns the reference returned by kfd_create_process */154filep->private_data = process;155156dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n",157process->lead_thread->pid, process->is_32bit_user_mode);158159return 0;160}161162static int kfd_release(struct inode *inode, struct file *filep)163{164struct kfd_process *process = filep->private_data;165166if (!process)167return 0;168169if (process->context_id != KFD_CONTEXT_ID_PRIMARY)170kfd_process_notifier_release_internal(process);171172kfd_unref_process(process);173174return 0;175}176177static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,178void *data)179{180struct kfd_ioctl_get_version_args *args = data;181182args->major_version = KFD_IOCTL_MAJOR_VERSION;183args->minor_version = KFD_IOCTL_MINOR_VERSION;184185return 0;186}187188static int set_queue_properties_from_user(struct queue_properties *q_properties,189struct kfd_ioctl_create_queue_args *args)190{191/*192* Repurpose queue percentage to accommodate new features:193* bit 0-7: queue percentage194* bit 8-15: pm4_target_xcc195*/196if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {197pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");198return -EINVAL;199}200201if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {202pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");203return -EINVAL;204}205206if ((args->ring_base_address) &&207(!access_ok((const void __user *) args->ring_base_address,208sizeof(uint64_t)))) {209pr_err("Can't access ring base address\n");210return -EFAULT;211}212213if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {214pr_err("Ring size must be a power of 2 or 0\n");215return -EINVAL;216}217218if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {219args->ring_size = KFD_MIN_QUEUE_RING_SIZE;220pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");221}222223if ((args->metadata_ring_size != 0) && !is_power_of_2(args->metadata_ring_size)) {224pr_err("Metadata ring size must be a power of 2 or 0\n");225return -EINVAL;226}227228if (!access_ok((const void __user *) args->read_pointer_address,229sizeof(uint32_t))) {230pr_err("Can't access read pointer\n");231return -EFAULT;232}233234if (!access_ok((const void __user *) args->write_pointer_address,235sizeof(uint32_t))) {236pr_err("Can't access write pointer\n");237return -EFAULT;238}239240if (args->eop_buffer_address &&241!access_ok((const void __user *) args->eop_buffer_address,242sizeof(uint32_t))) {243pr_debug("Can't access eop buffer");244return -EFAULT;245}246247if (args->ctx_save_restore_address &&248!access_ok((const void __user *) args->ctx_save_restore_address,249sizeof(uint32_t))) {250pr_debug("Can't access ctx save restore buffer");251return -EFAULT;252}253254q_properties->is_interop = false;255q_properties->is_gws = false;256q_properties->queue_percent = args->queue_percentage & 0xFF;257/* bit 8-15 are repurposed to be PM4 target XCC */258q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;259q_properties->priority = args->queue_priority;260q_properties->queue_address = args->ring_base_address;261q_properties->queue_size = args->ring_size;262if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)263q_properties->metadata_queue_size = args->metadata_ring_size;264265q_properties->read_ptr = (void __user *)args->read_pointer_address;266q_properties->write_ptr = (void __user *)args->write_pointer_address;267q_properties->eop_ring_buffer_address = args->eop_buffer_address;268q_properties->eop_ring_buffer_size = args->eop_buffer_size;269q_properties->ctx_save_restore_area_address =270args->ctx_save_restore_address;271q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;272q_properties->ctl_stack_size = args->ctl_stack_size;273q_properties->sdma_engine_id = args->sdma_engine_id;274if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||275args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)276q_properties->type = KFD_QUEUE_TYPE_COMPUTE;277else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)278q_properties->type = KFD_QUEUE_TYPE_SDMA;279else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)280q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;281else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID)282q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID;283else284return -ENOTSUPP;285286if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)287q_properties->format = KFD_QUEUE_FORMAT_AQL;288else289q_properties->format = KFD_QUEUE_FORMAT_PM4;290291pr_debug("Queue Percentage: %d, %d\n",292q_properties->queue_percent, args->queue_percentage);293294pr_debug("Queue Priority: %d, %d\n",295q_properties->priority, args->queue_priority);296297pr_debug("Queue Address: 0x%llX, 0x%llX\n",298q_properties->queue_address, args->ring_base_address);299300pr_debug("Queue Size: 0x%llX, %u\n",301q_properties->queue_size, args->ring_size);302303pr_debug("Queue r/w Pointers: %px, %px\n",304q_properties->read_ptr,305q_properties->write_ptr);306307pr_debug("Queue Format: %d\n", q_properties->format);308309pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);310311pr_debug("Queue CTX save area: 0x%llX\n",312q_properties->ctx_save_restore_area_address);313314return 0;315}316317static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,318void *data)319{320struct kfd_ioctl_create_queue_args *args = data;321struct kfd_node *dev;322int err = 0;323unsigned int queue_id;324struct kfd_process_device *pdd;325struct queue_properties q_properties;326uint32_t doorbell_offset_in_process = 0;327328memset(&q_properties, 0, sizeof(struct queue_properties));329330pr_debug("Creating queue ioctl\n");331332err = set_queue_properties_from_user(&q_properties, args);333if (err)334return err;335336pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);337338mutex_lock(&p->mutex);339340pdd = kfd_process_device_data_by_id(p, args->gpu_id);341if (!pdd) {342pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);343err = -EINVAL;344goto err_pdd;345}346dev = pdd->dev;347348pdd = kfd_bind_process_to_device(dev, p);349if (IS_ERR(pdd)) {350err = -ESRCH;351goto err_bind_process;352}353354if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {355int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) +356kfd_get_num_xgmi_sdma_engines(dev) - 1;357358if (q_properties.sdma_engine_id > max_sdma_eng_id) {359err = -EINVAL;360pr_err("sdma_engine_id %i exceeds maximum id of %i\n",361q_properties.sdma_engine_id, max_sdma_eng_id);362goto err_sdma_engine_id;363}364}365366if (!pdd->qpd.proc_doorbells) {367err = kfd_alloc_process_doorbells(dev->kfd, pdd);368if (err) {369pr_debug("failed to allocate process doorbells\n");370goto err_bind_process;371}372}373374err = kfd_queue_acquire_buffers(pdd, &q_properties);375if (err) {376pr_debug("failed to acquire user queue buffers\n");377goto err_acquire_queue_buf;378}379380pr_debug("Creating queue for process pid %d on gpu 0x%x\n",381p->lead_thread->pid,382dev->id);383384err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id,385NULL, NULL, NULL, &doorbell_offset_in_process);386if (err != 0)387goto err_create_queue;388389args->queue_id = queue_id;390391392/* Return gpu_id as doorbell offset for mmap usage */393args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;394args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);395if (KFD_IS_SOC15(dev))396/* On SOC15 ASICs, include the doorbell offset within the397* process doorbell frame, which is 2 pages.398*/399args->doorbell_offset |= doorbell_offset_in_process;400401mutex_unlock(&p->mutex);402403pr_debug("Queue id %d was created successfully\n", args->queue_id);404405pr_debug("Ring buffer address == 0x%016llX\n",406args->ring_base_address);407408pr_debug("Read ptr address == 0x%016llX\n",409args->read_pointer_address);410411pr_debug("Write ptr address == 0x%016llX\n",412args->write_pointer_address);413414kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0);415return 0;416417err_create_queue:418kfd_queue_unref_bo_vas(pdd, &q_properties);419kfd_queue_release_buffers(pdd, &q_properties);420err_acquire_queue_buf:421err_sdma_engine_id:422err_bind_process:423err_pdd:424mutex_unlock(&p->mutex);425return err;426}427428static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,429void *data)430{431int retval;432struct kfd_ioctl_destroy_queue_args *args = data;433434pr_debug("Destroying queue id %d for process pid %d\n",435args->queue_id,436p->lead_thread->pid);437438mutex_lock(&p->mutex);439440retval = pqm_destroy_queue(&p->pqm, args->queue_id);441442mutex_unlock(&p->mutex);443return retval;444}445446static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,447void *data)448{449int retval;450struct kfd_ioctl_update_queue_args *args = data;451struct queue_properties properties;452453/*454* Repurpose queue percentage to accommodate new features:455* bit 0-7: queue percentage456* bit 8-15: pm4_target_xcc457*/458if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {459pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");460return -EINVAL;461}462463if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {464pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");465return -EINVAL;466}467468if ((args->ring_base_address) &&469(!access_ok((const void __user *) args->ring_base_address,470sizeof(uint64_t)))) {471pr_err("Can't access ring base address\n");472return -EFAULT;473}474475if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {476pr_err("Ring size must be a power of 2 or 0\n");477return -EINVAL;478}479480if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {481args->ring_size = KFD_MIN_QUEUE_RING_SIZE;482pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");483}484485properties.queue_address = args->ring_base_address;486properties.queue_size = args->ring_size;487properties.queue_percent = args->queue_percentage & 0xFF;488/* bit 8-15 are repurposed to be PM4 target XCC */489properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;490properties.priority = args->queue_priority;491492pr_debug("Updating queue id %d for process pid %d\n",493args->queue_id, p->lead_thread->pid);494495mutex_lock(&p->mutex);496497retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties);498499mutex_unlock(&p->mutex);500501return retval;502}503504static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,505void *data)506{507int retval;508const int max_num_cus = 1024;509struct kfd_ioctl_set_cu_mask_args *args = data;510struct mqd_update_info minfo = {0};511uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;512size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);513514if ((args->num_cu_mask % 32) != 0) {515pr_debug("num_cu_mask 0x%x must be a multiple of 32",516args->num_cu_mask);517return -EINVAL;518}519520minfo.cu_mask.count = args->num_cu_mask;521if (minfo.cu_mask.count == 0) {522pr_debug("CU mask cannot be 0");523return -EINVAL;524}525526/* To prevent an unreasonably large CU mask size, set an arbitrary527* limit of max_num_cus bits. We can then just drop any CU mask bits528* past max_num_cus bits and just use the first max_num_cus bits.529*/530if (minfo.cu_mask.count > max_num_cus) {531pr_debug("CU mask cannot be greater than 1024 bits");532minfo.cu_mask.count = max_num_cus;533cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);534}535536minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size);537if (IS_ERR(minfo.cu_mask.ptr)) {538pr_debug("Could not copy CU mask from userspace");539return PTR_ERR(minfo.cu_mask.ptr);540}541542mutex_lock(&p->mutex);543544retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);545546mutex_unlock(&p->mutex);547548kfree(minfo.cu_mask.ptr);549return retval;550}551552static int kfd_ioctl_get_queue_wave_state(struct file *filep,553struct kfd_process *p, void *data)554{555struct kfd_ioctl_get_queue_wave_state_args *args = data;556int r;557558mutex_lock(&p->mutex);559560r = pqm_get_wave_state(&p->pqm, args->queue_id,561(void __user *)args->ctl_stack_address,562&args->ctl_stack_used_size,563&args->save_area_used_size);564565mutex_unlock(&p->mutex);566567return r;568}569570static int kfd_ioctl_set_memory_policy(struct file *filep,571struct kfd_process *p, void *data)572{573struct kfd_ioctl_set_memory_policy_args *args = data;574int err = 0;575struct kfd_process_device *pdd;576enum cache_policy default_policy, alternate_policy;577578if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT579&& args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {580return -EINVAL;581}582583if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT584&& args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {585return -EINVAL;586}587588mutex_lock(&p->mutex);589pdd = kfd_process_device_data_by_id(p, args->gpu_id);590if (!pdd) {591pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);592err = -EINVAL;593goto err_pdd;594}595596pdd = kfd_bind_process_to_device(pdd->dev, p);597if (IS_ERR(pdd)) {598err = -ESRCH;599goto out;600}601602default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)603? cache_policy_coherent : cache_policy_noncoherent;604605alternate_policy =606(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)607? cache_policy_coherent : cache_policy_noncoherent;608609if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,610&pdd->qpd,611default_policy,612alternate_policy,613(void __user *)args->alternate_aperture_base,614args->alternate_aperture_size,615args->misc_process_flag))616err = -EINVAL;617618out:619err_pdd:620mutex_unlock(&p->mutex);621622return err;623}624625static int kfd_ioctl_set_trap_handler(struct file *filep,626struct kfd_process *p, void *data)627{628struct kfd_ioctl_set_trap_handler_args *args = data;629int err = 0;630struct kfd_process_device *pdd;631632mutex_lock(&p->mutex);633634pdd = kfd_process_device_data_by_id(p, args->gpu_id);635if (!pdd) {636err = -EINVAL;637goto err_pdd;638}639640pdd = kfd_bind_process_to_device(pdd->dev, p);641if (IS_ERR(pdd)) {642err = -ESRCH;643goto out;644}645646kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);647648out:649err_pdd:650mutex_unlock(&p->mutex);651652return err;653}654655static int kfd_ioctl_dbg_register(struct file *filep,656struct kfd_process *p, void *data)657{658return -EPERM;659}660661static int kfd_ioctl_dbg_unregister(struct file *filep,662struct kfd_process *p, void *data)663{664return -EPERM;665}666667static int kfd_ioctl_dbg_address_watch(struct file *filep,668struct kfd_process *p, void *data)669{670return -EPERM;671}672673/* Parse and generate fixed size data structure for wave control */674static int kfd_ioctl_dbg_wave_control(struct file *filep,675struct kfd_process *p, void *data)676{677return -EPERM;678}679680static int kfd_ioctl_get_clock_counters(struct file *filep,681struct kfd_process *p, void *data)682{683struct kfd_ioctl_get_clock_counters_args *args = data;684struct kfd_process_device *pdd;685686mutex_lock(&p->mutex);687pdd = kfd_process_device_data_by_id(p, args->gpu_id);688mutex_unlock(&p->mutex);689if (pdd)690/* Reading GPU clock counter from KGD */691args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);692else693/* Node without GPU resource */694args->gpu_clock_counter = 0;695696/* No access to rdtsc. Using raw monotonic time */697args->cpu_clock_counter = ktime_get_raw_ns();698args->system_clock_counter = ktime_get_boottime_ns();699700/* Since the counter is in nano-seconds we use 1GHz frequency */701args->system_clock_freq = 1000000000;702703return 0;704}705706707static int kfd_ioctl_get_process_apertures(struct file *filp,708struct kfd_process *p, void *data)709{710struct kfd_ioctl_get_process_apertures_args *args = data;711struct kfd_process_device_apertures *pAperture;712int i;713714dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid);715716args->num_of_nodes = 0;717718mutex_lock(&p->mutex);719/* Run over all pdd of the process */720for (i = 0; i < p->n_pdds; i++) {721struct kfd_process_device *pdd = p->pdds[i];722723pAperture =724&args->process_apertures[args->num_of_nodes];725pAperture->gpu_id = pdd->dev->id;726pAperture->lds_base = pdd->lds_base;727pAperture->lds_limit = pdd->lds_limit;728pAperture->gpuvm_base = pdd->gpuvm_base;729pAperture->gpuvm_limit = pdd->gpuvm_limit;730pAperture->scratch_base = pdd->scratch_base;731pAperture->scratch_limit = pdd->scratch_limit;732733dev_dbg(kfd_device,734"node id %u\n", args->num_of_nodes);735dev_dbg(kfd_device,736"gpu id %u\n", pdd->dev->id);737dev_dbg(kfd_device,738"lds_base %llX\n", pdd->lds_base);739dev_dbg(kfd_device,740"lds_limit %llX\n", pdd->lds_limit);741dev_dbg(kfd_device,742"gpuvm_base %llX\n", pdd->gpuvm_base);743dev_dbg(kfd_device,744"gpuvm_limit %llX\n", pdd->gpuvm_limit);745dev_dbg(kfd_device,746"scratch_base %llX\n", pdd->scratch_base);747dev_dbg(kfd_device,748"scratch_limit %llX\n", pdd->scratch_limit);749750if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)751break;752}753mutex_unlock(&p->mutex);754755return 0;756}757758static int kfd_ioctl_get_process_apertures_new(struct file *filp,759struct kfd_process *p, void *data)760{761struct kfd_ioctl_get_process_apertures_new_args *args = data;762struct kfd_process_device_apertures *pa;763int ret;764int i;765766dev_dbg(kfd_device, "get apertures for process pid %d",767p->lead_thread->pid);768769if (args->num_of_nodes == 0) {770/* Return number of nodes, so that user space can alloacate771* sufficient memory772*/773mutex_lock(&p->mutex);774args->num_of_nodes = p->n_pdds;775goto out_unlock;776}777778/* Fill in process-aperture information for all available779* nodes, but not more than args->num_of_nodes as that is780* the amount of memory allocated by user781*/782pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),783GFP_KERNEL);784if (!pa)785return -ENOMEM;786787mutex_lock(&p->mutex);788789if (!p->n_pdds) {790args->num_of_nodes = 0;791kfree(pa);792goto out_unlock;793}794795/* Run over all pdd of the process */796for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {797struct kfd_process_device *pdd = p->pdds[i];798799pa[i].gpu_id = pdd->dev->id;800pa[i].lds_base = pdd->lds_base;801pa[i].lds_limit = pdd->lds_limit;802pa[i].gpuvm_base = pdd->gpuvm_base;803pa[i].gpuvm_limit = pdd->gpuvm_limit;804pa[i].scratch_base = pdd->scratch_base;805pa[i].scratch_limit = pdd->scratch_limit;806807dev_dbg(kfd_device,808"gpu id %u\n", pdd->dev->id);809dev_dbg(kfd_device,810"lds_base %llX\n", pdd->lds_base);811dev_dbg(kfd_device,812"lds_limit %llX\n", pdd->lds_limit);813dev_dbg(kfd_device,814"gpuvm_base %llX\n", pdd->gpuvm_base);815dev_dbg(kfd_device,816"gpuvm_limit %llX\n", pdd->gpuvm_limit);817dev_dbg(kfd_device,818"scratch_base %llX\n", pdd->scratch_base);819dev_dbg(kfd_device,820"scratch_limit %llX\n", pdd->scratch_limit);821}822mutex_unlock(&p->mutex);823824args->num_of_nodes = i;825ret = copy_to_user(826(void __user *)args->kfd_process_device_apertures_ptr,827pa,828(i * sizeof(struct kfd_process_device_apertures)));829kfree(pa);830return ret ? -EFAULT : 0;831832out_unlock:833mutex_unlock(&p->mutex);834return 0;835}836837static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,838void *data)839{840struct kfd_ioctl_create_event_args *args = data;841int err;842843/* For dGPUs the event page is allocated in user mode. The844* handle is passed to KFD with the first call to this IOCTL845* through the event_page_offset field.846*/847if (args->event_page_offset) {848mutex_lock(&p->mutex);849err = kfd_kmap_event_page(p, args->event_page_offset);850mutex_unlock(&p->mutex);851if (err)852return err;853}854855err = kfd_event_create(filp, p, args->event_type,856args->auto_reset != 0, args->node_id,857&args->event_id, &args->event_trigger_data,858&args->event_page_offset,859&args->event_slot_index);860861pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);862return err;863}864865static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,866void *data)867{868struct kfd_ioctl_destroy_event_args *args = data;869870return kfd_event_destroy(p, args->event_id);871}872873static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,874void *data)875{876struct kfd_ioctl_set_event_args *args = data;877878return kfd_set_event(p, args->event_id);879}880881static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,882void *data)883{884struct kfd_ioctl_reset_event_args *args = data;885886return kfd_reset_event(p, args->event_id);887}888889static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,890void *data)891{892struct kfd_ioctl_wait_events_args *args = data;893894return kfd_wait_on_events(p, args->num_events,895(void __user *)args->events_ptr,896(args->wait_for_all != 0),897&args->timeout, &args->wait_result);898}899static int kfd_ioctl_set_scratch_backing_va(struct file *filep,900struct kfd_process *p, void *data)901{902struct kfd_ioctl_set_scratch_backing_va_args *args = data;903struct kfd_process_device *pdd;904struct kfd_node *dev;905long err;906907mutex_lock(&p->mutex);908pdd = kfd_process_device_data_by_id(p, args->gpu_id);909if (!pdd) {910err = -EINVAL;911goto err_pdd;912}913dev = pdd->dev;914915pdd = kfd_bind_process_to_device(dev, p);916if (IS_ERR(pdd)) {917err = PTR_ERR(pdd);918goto bind_process_to_device_fail;919}920921pdd->qpd.sh_hidden_private_base = args->va_addr;922923mutex_unlock(&p->mutex);924925if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&926pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)927dev->kfd2kgd->set_scratch_backing_va(928dev->adev, args->va_addr, pdd->qpd.vmid);929930return 0;931932bind_process_to_device_fail:933err_pdd:934mutex_unlock(&p->mutex);935return err;936}937938static int kfd_ioctl_get_tile_config(struct file *filep,939struct kfd_process *p, void *data)940{941struct kfd_ioctl_get_tile_config_args *args = data;942struct kfd_process_device *pdd;943struct tile_config config;944int err = 0;945946mutex_lock(&p->mutex);947pdd = kfd_process_device_data_by_id(p, args->gpu_id);948mutex_unlock(&p->mutex);949if (!pdd)950return -EINVAL;951952amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);953954args->gb_addr_config = config.gb_addr_config;955args->num_banks = config.num_banks;956args->num_ranks = config.num_ranks;957958if (args->num_tile_configs > config.num_tile_configs)959args->num_tile_configs = config.num_tile_configs;960err = copy_to_user((void __user *)args->tile_config_ptr,961config.tile_config_ptr,962args->num_tile_configs * sizeof(uint32_t));963if (err) {964args->num_tile_configs = 0;965return -EFAULT;966}967968if (args->num_macro_tile_configs > config.num_macro_tile_configs)969args->num_macro_tile_configs =970config.num_macro_tile_configs;971err = copy_to_user((void __user *)args->macro_tile_config_ptr,972config.macro_tile_config_ptr,973args->num_macro_tile_configs * sizeof(uint32_t));974if (err) {975args->num_macro_tile_configs = 0;976return -EFAULT;977}978979return 0;980}981982static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,983void *data)984{985struct kfd_ioctl_acquire_vm_args *args = data;986struct kfd_process_device *pdd;987struct file *drm_file;988int ret;989990drm_file = fget(args->drm_fd);991if (!drm_file)992return -EINVAL;993994mutex_lock(&p->mutex);995pdd = kfd_process_device_data_by_id(p, args->gpu_id);996if (!pdd) {997ret = -EINVAL;998goto err_pdd;999}10001001if (pdd->drm_file) {1002ret = pdd->drm_file == drm_file ? 0 : -EBUSY;1003goto err_drm_file;1004}10051006ret = kfd_process_device_init_vm(pdd, drm_file);1007if (ret)1008goto err_unlock;10091010/* On success, the PDD keeps the drm_file reference */1011mutex_unlock(&p->mutex);10121013return 0;10141015err_unlock:1016err_pdd:1017err_drm_file:1018mutex_unlock(&p->mutex);1019fput(drm_file);1020return ret;1021}10221023bool kfd_dev_is_large_bar(struct kfd_node *dev)1024{1025if (dev->kfd->adev->debug_largebar) {1026pr_debug("Simulate large-bar allocation on non large-bar machine\n");1027return true;1028}10291030if (dev->local_mem_info.local_mem_size_private == 0 &&1031dev->local_mem_info.local_mem_size_public > 0)1032return true;10331034if (dev->local_mem_info.local_mem_size_public == 0 &&1035dev->kfd->adev->gmc.is_app_apu) {1036pr_debug("APP APU, Consider like a large bar system\n");1037return true;1038}10391040return false;1041}10421043static int kfd_ioctl_get_available_memory(struct file *filep,1044struct kfd_process *p, void *data)1045{1046struct kfd_ioctl_get_available_memory_args *args = data;1047struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);10481049if (!pdd)1050return -EINVAL;1051args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev,1052pdd->dev->node_id);1053kfd_unlock_pdd(pdd);1054return 0;1055}10561057static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,1058struct kfd_process *p, void *data)1059{1060struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;1061struct kfd_process_device *pdd;1062void *mem;1063struct kfd_node *dev;1064int idr_handle;1065long err;1066uint64_t offset = args->mmap_offset;1067uint32_t flags = args->flags;10681069if (args->size == 0)1070return -EINVAL;10711072if (p->context_id != KFD_CONTEXT_ID_PRIMARY && (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {1073pr_debug("USERPTR is not supported on non-primary kfd_process\n");10741075return -EOPNOTSUPP;1076}10771078#if IS_ENABLED(CONFIG_HSA_AMD_SVM)1079/* Flush pending deferred work to avoid racing with deferred actions1080* from previous memory map changes (e.g. munmap).1081*/1082svm_range_list_lock_and_flush_work(&p->svms, current->mm);1083mutex_lock(&p->svms.lock);1084mmap_write_unlock(current->mm);10851086/* Skip a special case that allocates VRAM without VA,1087* VA will be invalid of 0.1088*/1089if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) &&1090interval_tree_iter_first(&p->svms.objects,1091args->va_addr >> PAGE_SHIFT,1092(args->va_addr + args->size - 1) >> PAGE_SHIFT)) {1093pr_err("Address: 0x%llx already allocated by SVM\n",1094args->va_addr);1095mutex_unlock(&p->svms.lock);1096return -EADDRINUSE;1097}10981099/* When register user buffer check if it has been registered by svm by1100* buffer cpu virtual address.1101*/1102if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&1103interval_tree_iter_first(&p->svms.objects,1104args->mmap_offset >> PAGE_SHIFT,1105(args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) {1106pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",1107args->mmap_offset);1108mutex_unlock(&p->svms.lock);1109return -EADDRINUSE;1110}11111112mutex_unlock(&p->svms.lock);1113#endif1114mutex_lock(&p->mutex);1115pdd = kfd_process_device_data_by_id(p, args->gpu_id);1116if (!pdd) {1117err = -EINVAL;1118goto err_pdd;1119}11201121dev = pdd->dev;11221123if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&1124(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&1125!kfd_dev_is_large_bar(dev)) {1126pr_err("Alloc host visible vram on small bar is not allowed\n");1127err = -EINVAL;1128goto err_large_bar;1129}11301131pdd = kfd_bind_process_to_device(dev, p);1132if (IS_ERR(pdd)) {1133err = PTR_ERR(pdd);1134goto err_unlock;1135}11361137if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {1138if (args->size != kfd_doorbell_process_slice(dev->kfd)) {1139err = -EINVAL;1140goto err_unlock;1141}1142offset = kfd_get_process_doorbells(pdd);1143if (!offset) {1144err = -ENOMEM;1145goto err_unlock;1146}1147} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {1148if (args->size != PAGE_SIZE) {1149err = -EINVAL;1150goto err_unlock;1151}1152offset = dev->adev->rmmio_remap.bus_addr;1153if (!offset || (PAGE_SIZE > 4096)) {1154err = -ENOMEM;1155goto err_unlock;1156}1157}11581159err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(1160dev->adev, args->va_addr, args->size,1161pdd->drm_priv, (struct kgd_mem **) &mem, &offset,1162flags, false);11631164if (err)1165goto err_unlock;11661167idr_handle = kfd_process_device_create_obj_handle(pdd, mem);1168if (idr_handle < 0) {1169err = -EFAULT;1170goto err_free;1171}11721173/* Update the VRAM usage count */1174if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {1175uint64_t size = args->size;11761177if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)1178size >>= 1;1179atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);1180}11811182mutex_unlock(&p->mutex);11831184args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);1185args->mmap_offset = offset;11861187/* MMIO is mapped through kfd device1188* Generate a kfd mmap offset1189*/1190if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)1191args->mmap_offset = KFD_MMAP_TYPE_MMIO1192| KFD_MMAP_GPU_ID(args->gpu_id);11931194return 0;11951196err_free:1197amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,1198pdd->drm_priv, NULL);1199err_unlock:1200err_pdd:1201err_large_bar:1202mutex_unlock(&p->mutex);1203return err;1204}12051206static int kfd_ioctl_free_memory_of_gpu(struct file *filep,1207struct kfd_process *p, void *data)1208{1209struct kfd_ioctl_free_memory_of_gpu_args *args = data;1210struct kfd_process_device *pdd;1211void *mem;1212int ret;1213uint64_t size = 0;12141215mutex_lock(&p->mutex);1216/*1217* Safeguard to prevent user space from freeing signal BO.1218* It will be freed at process termination.1219*/1220if (p->signal_handle && (p->signal_handle == args->handle)) {1221pr_err("Free signal BO is not allowed\n");1222ret = -EPERM;1223goto err_unlock;1224}12251226pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));1227if (!pdd) {1228pr_err("Process device data doesn't exist\n");1229ret = -EINVAL;1230goto err_pdd;1231}12321233mem = kfd_process_device_translate_handle(1234pdd, GET_IDR_HANDLE(args->handle));1235if (!mem) {1236ret = -EINVAL;1237goto err_unlock;1238}12391240ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,1241(struct kgd_mem *)mem, pdd->drm_priv, &size);12421243/* If freeing the buffer failed, leave the handle in place for1244* clean-up during process tear-down.1245*/1246if (!ret)1247kfd_process_device_remove_obj_handle(1248pdd, GET_IDR_HANDLE(args->handle));12491250atomic64_sub(size, &pdd->vram_usage);12511252err_unlock:1253err_pdd:1254mutex_unlock(&p->mutex);1255return ret;1256}12571258static int kfd_ioctl_map_memory_to_gpu(struct file *filep,1259struct kfd_process *p, void *data)1260{1261struct kfd_ioctl_map_memory_to_gpu_args *args = data;1262struct kfd_process_device *pdd, *peer_pdd;1263void *mem;1264struct kfd_node *dev;1265long err = 0;1266int i;1267uint32_t *devices_arr = NULL;12681269if (!args->n_devices) {1270pr_debug("Device IDs array empty\n");1271return -EINVAL;1272}1273if (args->n_success > args->n_devices) {1274pr_debug("n_success exceeds n_devices\n");1275return -EINVAL;1276}12771278devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),1279GFP_KERNEL);1280if (!devices_arr)1281return -ENOMEM;12821283err = copy_from_user(devices_arr,1284(void __user *)args->device_ids_array_ptr,1285args->n_devices * sizeof(*devices_arr));1286if (err != 0) {1287err = -EFAULT;1288goto copy_from_user_failed;1289}12901291mutex_lock(&p->mutex);1292pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));1293if (!pdd) {1294err = -EINVAL;1295goto get_process_device_data_failed;1296}1297dev = pdd->dev;12981299pdd = kfd_bind_process_to_device(dev, p);1300if (IS_ERR(pdd)) {1301err = PTR_ERR(pdd);1302goto bind_process_to_device_failed;1303}13041305mem = kfd_process_device_translate_handle(pdd,1306GET_IDR_HANDLE(args->handle));1307if (!mem) {1308err = -ENOMEM;1309goto get_mem_obj_from_handle_failed;1310}13111312for (i = args->n_success; i < args->n_devices; i++) {1313peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1314if (!peer_pdd) {1315pr_debug("Getting device by id failed for 0x%x\n",1316devices_arr[i]);1317err = -EINVAL;1318goto get_mem_obj_from_handle_failed;1319}13201321peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);1322if (IS_ERR(peer_pdd)) {1323err = PTR_ERR(peer_pdd);1324goto get_mem_obj_from_handle_failed;1325}13261327err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(1328peer_pdd->dev->adev, (struct kgd_mem *)mem,1329peer_pdd->drm_priv);1330if (err) {1331struct pci_dev *pdev = peer_pdd->dev->adev->pdev;13321333dev_err(dev->adev->dev,1334"Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",1335pci_domain_nr(pdev->bus),1336pdev->bus->number,1337PCI_SLOT(pdev->devfn),1338PCI_FUNC(pdev->devfn),1339((struct kgd_mem *)mem)->domain);1340goto map_memory_to_gpu_failed;1341}1342args->n_success = i+1;1343}13441345err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);1346if (err) {1347pr_debug("Sync memory failed, wait interrupted by user signal\n");1348goto sync_memory_failed;1349}13501351mutex_unlock(&p->mutex);13521353/* Flush TLBs after waiting for the page table updates to complete */1354for (i = 0; i < args->n_devices; i++) {1355peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1356if (WARN_ON_ONCE(!peer_pdd))1357continue;1358kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);1359}1360kfree(devices_arr);13611362return err;13631364get_process_device_data_failed:1365bind_process_to_device_failed:1366get_mem_obj_from_handle_failed:1367map_memory_to_gpu_failed:1368sync_memory_failed:1369mutex_unlock(&p->mutex);1370copy_from_user_failed:1371kfree(devices_arr);13721373return err;1374}13751376static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,1377struct kfd_process *p, void *data)1378{1379struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;1380struct kfd_process_device *pdd, *peer_pdd;1381void *mem;1382long err = 0;1383uint32_t *devices_arr = NULL, i;1384bool flush_tlb;13851386if (!args->n_devices) {1387pr_debug("Device IDs array empty\n");1388return -EINVAL;1389}1390if (args->n_success > args->n_devices) {1391pr_debug("n_success exceeds n_devices\n");1392return -EINVAL;1393}13941395devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),1396GFP_KERNEL);1397if (!devices_arr)1398return -ENOMEM;13991400err = copy_from_user(devices_arr,1401(void __user *)args->device_ids_array_ptr,1402args->n_devices * sizeof(*devices_arr));1403if (err != 0) {1404err = -EFAULT;1405goto copy_from_user_failed;1406}14071408mutex_lock(&p->mutex);1409pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));1410if (!pdd) {1411err = -EINVAL;1412goto bind_process_to_device_failed;1413}14141415mem = kfd_process_device_translate_handle(pdd,1416GET_IDR_HANDLE(args->handle));1417if (!mem) {1418err = -ENOMEM;1419goto get_mem_obj_from_handle_failed;1420}14211422for (i = args->n_success; i < args->n_devices; i++) {1423peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1424if (!peer_pdd) {1425err = -EINVAL;1426goto get_mem_obj_from_handle_failed;1427}1428err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(1429peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);1430if (err) {1431pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices);1432goto unmap_memory_from_gpu_failed;1433}1434args->n_success = i+1;1435}14361437flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);1438if (flush_tlb) {1439err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,1440(struct kgd_mem *) mem, true);1441if (err) {1442pr_debug("Sync memory failed, wait interrupted by user signal\n");1443goto sync_memory_failed;1444}1445}14461447/* Flush TLBs after waiting for the page table updates to complete */1448for (i = 0; i < args->n_devices; i++) {1449peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1450if (WARN_ON_ONCE(!peer_pdd))1451continue;1452if (flush_tlb)1453kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);14541455/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */1456err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);1457if (err)1458goto sync_memory_failed;1459}14601461mutex_unlock(&p->mutex);14621463kfree(devices_arr);14641465return 0;14661467bind_process_to_device_failed:1468get_mem_obj_from_handle_failed:1469unmap_memory_from_gpu_failed:1470sync_memory_failed:1471mutex_unlock(&p->mutex);1472copy_from_user_failed:1473kfree(devices_arr);1474return err;1475}14761477static int kfd_ioctl_alloc_queue_gws(struct file *filep,1478struct kfd_process *p, void *data)1479{1480int retval;1481struct kfd_ioctl_alloc_queue_gws_args *args = data;1482struct queue *q;1483struct kfd_node *dev;14841485mutex_lock(&p->mutex);1486q = pqm_get_user_queue(&p->pqm, args->queue_id);14871488if (q) {1489dev = q->device;1490} else {1491retval = -EINVAL;1492goto out_unlock;1493}14941495if (!dev->gws) {1496retval = -ENODEV;1497goto out_unlock;1498}14991500if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {1501retval = -ENODEV;1502goto out_unlock;1503}15041505if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) ||1506kfd_dbg_has_cwsr_workaround(dev))) {1507retval = -EBUSY;1508goto out_unlock;1509}15101511retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);1512mutex_unlock(&p->mutex);15131514args->first_gws = 0;1515return retval;15161517out_unlock:1518mutex_unlock(&p->mutex);1519return retval;1520}15211522static int kfd_ioctl_get_dmabuf_info(struct file *filep,1523struct kfd_process *p, void *data)1524{1525struct kfd_ioctl_get_dmabuf_info_args *args = data;1526struct kfd_node *dev = NULL;1527struct amdgpu_device *dmabuf_adev;1528void *metadata_buffer = NULL;1529uint32_t flags;1530int8_t xcp_id;1531unsigned int i;1532int r;15331534/* Find a KFD GPU device that supports the get_dmabuf_info query */1535for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)1536if (dev && !kfd_devcgroup_check_permission(dev))1537break;1538if (!dev)1539return -EINVAL;15401541if (args->metadata_ptr) {1542metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);1543if (!metadata_buffer)1544return -ENOMEM;1545}15461547/* Get dmabuf info from KGD */1548r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,1549&dmabuf_adev, &args->size,1550metadata_buffer, args->metadata_size,1551&args->metadata_size, &flags, &xcp_id);1552if (r)1553goto exit;15541555if (xcp_id >= 0)1556args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;1557else1558args->gpu_id = dev->id;1559args->flags = flags;15601561/* Copy metadata buffer to user mode */1562if (metadata_buffer) {1563r = copy_to_user((void __user *)args->metadata_ptr,1564metadata_buffer, args->metadata_size);1565if (r != 0)1566r = -EFAULT;1567}15681569exit:1570kfree(metadata_buffer);15711572return r;1573}15741575static int kfd_ioctl_import_dmabuf(struct file *filep,1576struct kfd_process *p, void *data)1577{1578struct kfd_ioctl_import_dmabuf_args *args = data;1579struct kfd_process_device *pdd;1580int idr_handle;1581uint64_t size;1582void *mem;1583int r;15841585mutex_lock(&p->mutex);1586pdd = kfd_process_device_data_by_id(p, args->gpu_id);1587if (!pdd) {1588r = -EINVAL;1589goto err_unlock;1590}15911592pdd = kfd_bind_process_to_device(pdd->dev, p);1593if (IS_ERR(pdd)) {1594r = PTR_ERR(pdd);1595goto err_unlock;1596}15971598r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd,1599args->va_addr, pdd->drm_priv,1600(struct kgd_mem **)&mem, &size,1601NULL);1602if (r)1603goto err_unlock;16041605idr_handle = kfd_process_device_create_obj_handle(pdd, mem);1606if (idr_handle < 0) {1607r = -EFAULT;1608goto err_free;1609}16101611mutex_unlock(&p->mutex);16121613args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);16141615return 0;16161617err_free:1618amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,1619pdd->drm_priv, NULL);1620err_unlock:1621mutex_unlock(&p->mutex);1622return r;1623}16241625static int kfd_ioctl_export_dmabuf(struct file *filep,1626struct kfd_process *p, void *data)1627{1628struct kfd_ioctl_export_dmabuf_args *args = data;1629struct kfd_process_device *pdd;1630struct dma_buf *dmabuf;1631struct kfd_node *dev;1632void *mem;1633int ret = 0;16341635dev = kfd_device_by_id(GET_GPU_ID(args->handle));1636if (!dev)1637return -EINVAL;16381639mutex_lock(&p->mutex);16401641pdd = kfd_get_process_device_data(dev, p);1642if (!pdd) {1643ret = -EINVAL;1644goto err_unlock;1645}16461647mem = kfd_process_device_translate_handle(pdd,1648GET_IDR_HANDLE(args->handle));1649if (!mem) {1650ret = -EINVAL;1651goto err_unlock;1652}16531654ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);1655mutex_unlock(&p->mutex);1656if (ret)1657goto err_out;16581659ret = dma_buf_fd(dmabuf, args->flags);1660if (ret < 0) {1661dma_buf_put(dmabuf);1662goto err_out;1663}1664/* dma_buf_fd assigns the reference count to the fd, no need to1665* put the reference here.1666*/1667args->dmabuf_fd = ret;16681669return 0;16701671err_unlock:1672mutex_unlock(&p->mutex);1673err_out:1674return ret;1675}16761677/* Handle requests for watching SMI events */1678static int kfd_ioctl_smi_events(struct file *filep,1679struct kfd_process *p, void *data)1680{1681struct kfd_ioctl_smi_events_args *args = data;1682struct kfd_process_device *pdd;16831684mutex_lock(&p->mutex);16851686pdd = kfd_process_device_data_by_id(p, args->gpuid);1687mutex_unlock(&p->mutex);1688if (!pdd)1689return -EINVAL;16901691return kfd_smi_event_open(pdd->dev, &args->anon_fd);1692}16931694#if IS_ENABLED(CONFIG_HSA_AMD_SVM)16951696static int kfd_ioctl_set_xnack_mode(struct file *filep,1697struct kfd_process *p, void *data)1698{1699struct kfd_ioctl_set_xnack_mode_args *args = data;1700int r = 0;17011702mutex_lock(&p->mutex);1703if (args->xnack_enabled >= 0) {1704if (!list_empty(&p->pqm.queues)) {1705pr_debug("Process has user queues running\n");1706r = -EBUSY;1707goto out_unlock;1708}17091710if (p->xnack_enabled == args->xnack_enabled)1711goto out_unlock;17121713if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {1714r = -EPERM;1715goto out_unlock;1716}17171718r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled);1719} else {1720args->xnack_enabled = p->xnack_enabled;1721}17221723out_unlock:1724mutex_unlock(&p->mutex);17251726return r;1727}17281729static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)1730{1731struct kfd_ioctl_svm_args *args = data;1732int r = 0;17331734if (p->context_id != KFD_CONTEXT_ID_PRIMARY) {1735pr_debug("SVM ioctl not supported on non-primary kfd process\n");17361737return -EOPNOTSUPP;1738}17391740pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",1741args->start_addr, args->size, args->op, args->nattr);17421743if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))1744return -EINVAL;1745if (!args->start_addr || !args->size)1746return -EINVAL;17471748r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,1749args->attrs);17501751return r;1752}1753#else1754static int kfd_ioctl_set_xnack_mode(struct file *filep,1755struct kfd_process *p, void *data)1756{1757return -EPERM;1758}1759static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)1760{1761return -EPERM;1762}1763#endif17641765static int criu_checkpoint_process(struct kfd_process *p,1766uint8_t __user *user_priv_data,1767uint64_t *priv_offset)1768{1769struct kfd_criu_process_priv_data process_priv;1770int ret;17711772memset(&process_priv, 0, sizeof(process_priv));17731774process_priv.version = KFD_CRIU_PRIV_VERSION;1775/* For CR, we don't consider negative xnack mode which is used for1776* querying without changing it, here 0 simply means disabled and 11777* means enabled so retry for finding a valid PTE.1778*/1779process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;17801781ret = copy_to_user(user_priv_data + *priv_offset,1782&process_priv, sizeof(process_priv));17831784if (ret) {1785pr_err("Failed to copy process information to user\n");1786ret = -EFAULT;1787}17881789*priv_offset += sizeof(process_priv);1790return ret;1791}17921793static int criu_checkpoint_devices(struct kfd_process *p,1794uint32_t num_devices,1795uint8_t __user *user_addr,1796uint8_t __user *user_priv_data,1797uint64_t *priv_offset)1798{1799struct kfd_criu_device_priv_data *device_priv = NULL;1800struct kfd_criu_device_bucket *device_buckets = NULL;1801int ret = 0, i;18021803device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);1804if (!device_buckets) {1805ret = -ENOMEM;1806goto exit;1807}18081809device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);1810if (!device_priv) {1811ret = -ENOMEM;1812goto exit;1813}18141815for (i = 0; i < num_devices; i++) {1816struct kfd_process_device *pdd = p->pdds[i];18171818device_buckets[i].user_gpu_id = pdd->user_gpu_id;1819device_buckets[i].actual_gpu_id = pdd->dev->id;18201821/*1822* priv_data does not contain useful information for now and is reserved for1823* future use, so we do not set its contents.1824*/1825}18261827ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));1828if (ret) {1829pr_err("Failed to copy device information to user\n");1830ret = -EFAULT;1831goto exit;1832}18331834ret = copy_to_user(user_priv_data + *priv_offset,1835device_priv,1836num_devices * sizeof(*device_priv));1837if (ret) {1838pr_err("Failed to copy device information to user\n");1839ret = -EFAULT;1840}1841*priv_offset += num_devices * sizeof(*device_priv);18421843exit:1844kvfree(device_buckets);1845kvfree(device_priv);1846return ret;1847}18481849static uint32_t get_process_num_bos(struct kfd_process *p)1850{1851uint32_t num_of_bos = 0;1852int i;18531854/* Run over all PDDs of the process */1855for (i = 0; i < p->n_pdds; i++) {1856struct kfd_process_device *pdd = p->pdds[i];1857void *mem;1858int id;18591860idr_for_each_entry(&pdd->alloc_idr, mem, id) {1861struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;18621863if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)1864num_of_bos++;1865}1866}1867return num_of_bos;1868}18691870static int criu_get_prime_handle(struct kgd_mem *mem,1871int flags, u32 *shared_fd,1872struct file **file)1873{1874struct dma_buf *dmabuf;1875int ret;18761877ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);1878if (ret) {1879pr_err("dmabuf export failed for the BO\n");1880return ret;1881}18821883ret = get_unused_fd_flags(flags);1884if (ret < 0) {1885pr_err("dmabuf create fd failed, ret:%d\n", ret);1886goto out_free_dmabuf;1887}18881889*shared_fd = ret;1890*file = dmabuf->file;1891return 0;18921893out_free_dmabuf:1894dma_buf_put(dmabuf);1895return ret;1896}18971898static void commit_files(struct file **files,1899struct kfd_criu_bo_bucket *bo_buckets,1900unsigned int count,1901int err)1902{1903while (count--) {1904struct file *file = files[count];19051906if (!file)1907continue;1908if (err) {1909fput(file);1910put_unused_fd(bo_buckets[count].dmabuf_fd);1911} else {1912fd_install(bo_buckets[count].dmabuf_fd, file);1913}1914}1915}19161917static int criu_checkpoint_bos(struct kfd_process *p,1918uint32_t num_bos,1919uint8_t __user *user_bos,1920uint8_t __user *user_priv_data,1921uint64_t *priv_offset)1922{1923struct kfd_criu_bo_bucket *bo_buckets;1924struct kfd_criu_bo_priv_data *bo_privs;1925struct file **files = NULL;1926int ret = 0, pdd_index, bo_index = 0, id;1927void *mem;19281929bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);1930if (!bo_buckets)1931return -ENOMEM;19321933bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);1934if (!bo_privs) {1935ret = -ENOMEM;1936goto exit;1937}19381939files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);1940if (!files) {1941ret = -ENOMEM;1942goto exit;1943}19441945for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {1946struct kfd_process_device *pdd = p->pdds[pdd_index];1947struct amdgpu_bo *dumper_bo;1948struct kgd_mem *kgd_mem;19491950idr_for_each_entry(&pdd->alloc_idr, mem, id) {1951struct kfd_criu_bo_bucket *bo_bucket;1952struct kfd_criu_bo_priv_data *bo_priv;1953int i, dev_idx = 0;19541955kgd_mem = (struct kgd_mem *)mem;1956dumper_bo = kgd_mem->bo;19571958/* Skip checkpointing BOs that are used for Trap handler1959* code and state. Currently, these BOs have a VA that1960* is less GPUVM Base1961*/1962if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base)1963continue;19641965bo_bucket = &bo_buckets[bo_index];1966bo_priv = &bo_privs[bo_index];19671968bo_bucket->gpu_id = pdd->user_gpu_id;1969bo_bucket->addr = (uint64_t)kgd_mem->va;1970bo_bucket->size = amdgpu_bo_size(dumper_bo);1971bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;1972bo_priv->idr_handle = id;19731974if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {1975ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,1976&bo_priv->user_addr);1977if (ret) {1978pr_err("Failed to obtain user address for user-pointer bo\n");1979goto exit;1980}1981}1982if (bo_bucket->alloc_flags1983& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {1984ret = criu_get_prime_handle(kgd_mem,1985bo_bucket->alloc_flags &1986KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,1987&bo_bucket->dmabuf_fd, &files[bo_index]);1988if (ret)1989goto exit;1990} else {1991bo_bucket->dmabuf_fd = KFD_INVALID_FD;1992}19931994if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)1995bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |1996KFD_MMAP_GPU_ID(pdd->dev->id);1997else if (bo_bucket->alloc_flags &1998KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)1999bo_bucket->offset = KFD_MMAP_TYPE_MMIO |2000KFD_MMAP_GPU_ID(pdd->dev->id);2001else2002bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);20032004for (i = 0; i < p->n_pdds; i++) {2005if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem))2006bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;2007}20082009pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"2010"gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",2011bo_bucket->size,2012bo_bucket->addr,2013bo_bucket->offset,2014bo_bucket->gpu_id,2015bo_bucket->alloc_flags,2016bo_priv->idr_handle);2017bo_index++;2018}2019}20202021ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));2022if (ret) {2023pr_err("Failed to copy BO information to user\n");2024ret = -EFAULT;2025goto exit;2026}20272028ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));2029if (ret) {2030pr_err("Failed to copy BO priv information to user\n");2031ret = -EFAULT;2032goto exit;2033}20342035*priv_offset += num_bos * sizeof(*bo_privs);20362037exit:2038commit_files(files, bo_buckets, bo_index, ret);2039kvfree(files);2040kvfree(bo_buckets);2041kvfree(bo_privs);2042return ret;2043}20442045static int criu_get_process_object_info(struct kfd_process *p,2046uint32_t *num_devices,2047uint32_t *num_bos,2048uint32_t *num_objects,2049uint64_t *objs_priv_size)2050{2051uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;2052uint32_t num_queues, num_events, num_svm_ranges;2053int ret;20542055*num_devices = p->n_pdds;2056*num_bos = get_process_num_bos(p);20572058ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);2059if (ret)2060return ret;20612062num_events = kfd_get_num_events(p);20632064svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);20652066*num_objects = num_queues + num_events + num_svm_ranges;20672068if (objs_priv_size) {2069priv_size = sizeof(struct kfd_criu_process_priv_data);2070priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);2071priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);2072priv_size += queues_priv_data_size;2073priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);2074priv_size += svm_priv_data_size;2075*objs_priv_size = priv_size;2076}2077return 0;2078}20792080static int criu_checkpoint(struct file *filep,2081struct kfd_process *p,2082struct kfd_ioctl_criu_args *args)2083{2084int ret;2085uint32_t num_devices, num_bos, num_objects;2086uint64_t priv_size, priv_offset = 0, bo_priv_offset;20872088if (!args->devices || !args->bos || !args->priv_data)2089return -EINVAL;20902091mutex_lock(&p->mutex);20922093if (!p->n_pdds) {2094pr_err("No pdd for given process\n");2095ret = -ENODEV;2096goto exit_unlock;2097}20982099/* Confirm all process queues are evicted */2100if (!p->queues_paused) {2101pr_err("Cannot dump process when queues are not in evicted state\n");2102/* CRIU plugin did not call op PROCESS_INFO before checkpointing */2103ret = -EINVAL;2104goto exit_unlock;2105}21062107ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);2108if (ret)2109goto exit_unlock;21102111if (num_devices != args->num_devices ||2112num_bos != args->num_bos ||2113num_objects != args->num_objects ||2114priv_size != args->priv_data_size) {21152116ret = -EINVAL;2117goto exit_unlock;2118}21192120/* each function will store private data inside priv_data and adjust priv_offset */2121ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);2122if (ret)2123goto exit_unlock;21242125ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,2126(uint8_t __user *)args->priv_data, &priv_offset);2127if (ret)2128goto exit_unlock;21292130/* Leave room for BOs in the private data. They need to be restored2131* before events, but we checkpoint them last to simplify the error2132* handling.2133*/2134bo_priv_offset = priv_offset;2135priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);21362137if (num_objects) {2138ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,2139&priv_offset);2140if (ret)2141goto exit_unlock;21422143ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,2144&priv_offset);2145if (ret)2146goto exit_unlock;21472148ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);2149if (ret)2150goto exit_unlock;2151}21522153/* This must be the last thing in this function that can fail.2154* Otherwise we leak dmabuf file descriptors.2155*/2156ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,2157(uint8_t __user *)args->priv_data, &bo_priv_offset);21582159exit_unlock:2160mutex_unlock(&p->mutex);2161if (ret)2162pr_err("Failed to dump CRIU ret:%d\n", ret);2163else2164pr_debug("CRIU dump ret:%d\n", ret);21652166return ret;2167}21682169static int criu_restore_process(struct kfd_process *p,2170struct kfd_ioctl_criu_args *args,2171uint64_t *priv_offset,2172uint64_t max_priv_data_size)2173{2174int ret = 0;2175struct kfd_criu_process_priv_data process_priv;21762177if (*priv_offset + sizeof(process_priv) > max_priv_data_size)2178return -EINVAL;21792180ret = copy_from_user(&process_priv,2181(void __user *)(args->priv_data + *priv_offset),2182sizeof(process_priv));2183if (ret) {2184pr_err("Failed to copy process private information from user\n");2185ret = -EFAULT;2186goto exit;2187}2188*priv_offset += sizeof(process_priv);21892190if (process_priv.version != KFD_CRIU_PRIV_VERSION) {2191pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",2192process_priv.version, KFD_CRIU_PRIV_VERSION);2193return -EINVAL;2194}21952196pr_debug("Setting XNACK mode\n");2197if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {2198pr_err("xnack mode cannot be set\n");2199ret = -EPERM;2200goto exit;2201} else {2202pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);2203p->xnack_enabled = process_priv.xnack_mode;2204}22052206exit:2207return ret;2208}22092210static int criu_restore_devices(struct kfd_process *p,2211struct kfd_ioctl_criu_args *args,2212uint64_t *priv_offset,2213uint64_t max_priv_data_size)2214{2215struct kfd_criu_device_bucket *device_buckets;2216struct kfd_criu_device_priv_data *device_privs;2217int ret = 0;2218uint32_t i;22192220if (args->num_devices != p->n_pdds)2221return -EINVAL;22222223if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)2224return -EINVAL;22252226device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);2227if (!device_buckets)2228return -ENOMEM;22292230ret = copy_from_user(device_buckets, (void __user *)args->devices,2231args->num_devices * sizeof(*device_buckets));2232if (ret) {2233pr_err("Failed to copy devices buckets from user\n");2234ret = -EFAULT;2235goto exit;2236}22372238for (i = 0; i < args->num_devices; i++) {2239struct kfd_node *dev;2240struct kfd_process_device *pdd;2241struct file *drm_file;22422243/* device private data is not currently used */22442245if (!device_buckets[i].user_gpu_id) {2246pr_err("Invalid user gpu_id\n");2247ret = -EINVAL;2248goto exit;2249}22502251dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);2252if (!dev) {2253pr_err("Failed to find device with gpu_id = %x\n",2254device_buckets[i].actual_gpu_id);2255ret = -EINVAL;2256goto exit;2257}22582259pdd = kfd_get_process_device_data(dev, p);2260if (!pdd) {2261pr_err("Failed to get pdd for gpu_id = %x\n",2262device_buckets[i].actual_gpu_id);2263ret = -EINVAL;2264goto exit;2265}2266pdd->user_gpu_id = device_buckets[i].user_gpu_id;22672268drm_file = fget(device_buckets[i].drm_fd);2269if (!drm_file) {2270pr_err("Invalid render node file descriptor sent from plugin (%d)\n",2271device_buckets[i].drm_fd);2272ret = -EINVAL;2273goto exit;2274}22752276if (pdd->drm_file) {2277ret = -EINVAL;2278goto exit;2279}22802281/* create the vm using render nodes for kfd pdd */2282if (kfd_process_device_init_vm(pdd, drm_file)) {2283pr_err("could not init vm for given pdd\n");2284/* On success, the PDD keeps the drm_file reference */2285fput(drm_file);2286ret = -EINVAL;2287goto exit;2288}2289/*2290* pdd now already has the vm bound to render node so below api won't create a new2291* exclusive kfd mapping but use existing one with renderDXXX but is still needed2292* for iommu v2 binding and runtime pm.2293*/2294pdd = kfd_bind_process_to_device(dev, p);2295if (IS_ERR(pdd)) {2296ret = PTR_ERR(pdd);2297goto exit;2298}22992300if (!pdd->qpd.proc_doorbells) {2301ret = kfd_alloc_process_doorbells(dev->kfd, pdd);2302if (ret)2303goto exit;2304}2305}23062307/*2308* We are not copying device private data from user as we are not using the data for now,2309* but we still adjust for its private data.2310*/2311*priv_offset += args->num_devices * sizeof(*device_privs);23122313exit:2314kfree(device_buckets);2315return ret;2316}23172318static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,2319struct kfd_criu_bo_bucket *bo_bucket,2320struct kfd_criu_bo_priv_data *bo_priv,2321struct kgd_mem **kgd_mem)2322{2323int idr_handle;2324int ret;2325const bool criu_resume = true;2326u64 offset;23272328if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {2329if (bo_bucket->size !=2330kfd_doorbell_process_slice(pdd->dev->kfd))2331return -EINVAL;23322333offset = kfd_get_process_doorbells(pdd);2334if (!offset)2335return -ENOMEM;2336} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {2337/* MMIO BOs need remapped bus address */2338if (bo_bucket->size != PAGE_SIZE) {2339pr_err("Invalid page size\n");2340return -EINVAL;2341}2342offset = pdd->dev->adev->rmmio_remap.bus_addr;2343if (!offset || (PAGE_SIZE > 4096)) {2344pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");2345return -ENOMEM;2346}2347} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {2348offset = bo_priv->user_addr;2349}2350/* Create the BO */2351ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,2352bo_bucket->size, pdd->drm_priv, kgd_mem,2353&offset, bo_bucket->alloc_flags, criu_resume);2354if (ret) {2355pr_err("Could not create the BO\n");2356return ret;2357}2358pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",2359bo_bucket->size, bo_bucket->addr, offset);23602361/* Restore previous IDR handle */2362pr_debug("Restoring old IDR handle for the BO");2363idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,2364bo_priv->idr_handle + 1, GFP_KERNEL);23652366if (idr_handle < 0) {2367pr_err("Could not allocate idr\n");2368amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,2369NULL);2370return -ENOMEM;2371}23722373if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)2374bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);2375if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {2376bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);2377} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {2378bo_bucket->restored_offset = offset;2379} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {2380bo_bucket->restored_offset = offset;2381/* Update the VRAM usage count */2382atomic64_add(bo_bucket->size, &pdd->vram_usage);2383}2384return 0;2385}23862387static int criu_restore_bo(struct kfd_process *p,2388struct kfd_criu_bo_bucket *bo_bucket,2389struct kfd_criu_bo_priv_data *bo_priv,2390struct file **file)2391{2392struct kfd_process_device *pdd;2393struct kgd_mem *kgd_mem;2394int ret;2395int j;23962397pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",2398bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,2399bo_priv->idr_handle);24002401pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);2402if (!pdd) {2403pr_err("Failed to get pdd\n");2404return -ENODEV;2405}24062407ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);2408if (ret)2409return ret;24102411/* now map these BOs to GPU/s */2412for (j = 0; j < p->n_pdds; j++) {2413struct kfd_node *peer;2414struct kfd_process_device *peer_pdd;24152416if (!bo_priv->mapped_gpuids[j])2417break;24182419peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);2420if (!peer_pdd)2421return -EINVAL;24222423peer = peer_pdd->dev;24242425peer_pdd = kfd_bind_process_to_device(peer, p);2426if (IS_ERR(peer_pdd))2427return PTR_ERR(peer_pdd);24282429ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,2430peer_pdd->drm_priv);2431if (ret) {2432pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);2433return ret;2434}2435}24362437pr_debug("map memory was successful for the BO\n");2438/* create the dmabuf object and export the bo */2439if (bo_bucket->alloc_flags2440& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {2441ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,2442&bo_bucket->dmabuf_fd, file);2443if (ret)2444return ret;2445} else {2446bo_bucket->dmabuf_fd = KFD_INVALID_FD;2447}24482449return 0;2450}24512452static int criu_restore_bos(struct kfd_process *p,2453struct kfd_ioctl_criu_args *args,2454uint64_t *priv_offset,2455uint64_t max_priv_data_size)2456{2457struct kfd_criu_bo_bucket *bo_buckets = NULL;2458struct kfd_criu_bo_priv_data *bo_privs = NULL;2459struct file **files = NULL;2460int ret = 0;2461uint32_t i = 0;24622463if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)2464return -EINVAL;24652466/* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */2467amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);24682469bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);2470if (!bo_buckets)2471return -ENOMEM;24722473files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);2474if (!files) {2475ret = -ENOMEM;2476goto exit;2477}24782479ret = copy_from_user(bo_buckets, (void __user *)args->bos,2480args->num_bos * sizeof(*bo_buckets));2481if (ret) {2482pr_err("Failed to copy BOs information from user\n");2483ret = -EFAULT;2484goto exit;2485}24862487bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);2488if (!bo_privs) {2489ret = -ENOMEM;2490goto exit;2491}24922493ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,2494args->num_bos * sizeof(*bo_privs));2495if (ret) {2496pr_err("Failed to copy BOs information from user\n");2497ret = -EFAULT;2498goto exit;2499}2500*priv_offset += args->num_bos * sizeof(*bo_privs);25012502/* Create and map new BOs */2503for (; i < args->num_bos; i++) {2504ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);2505if (ret) {2506pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);2507goto exit;2508}2509} /* done */25102511/* Copy only the buckets back so user can read bo_buckets[N].restored_offset */2512ret = copy_to_user((void __user *)args->bos,2513bo_buckets,2514(args->num_bos * sizeof(*bo_buckets)));2515if (ret)2516ret = -EFAULT;25172518exit:2519commit_files(files, bo_buckets, i, ret);2520kvfree(files);2521kvfree(bo_buckets);2522kvfree(bo_privs);2523return ret;2524}25252526static int criu_restore_objects(struct file *filep,2527struct kfd_process *p,2528struct kfd_ioctl_criu_args *args,2529uint64_t *priv_offset,2530uint64_t max_priv_data_size)2531{2532int ret = 0;2533uint32_t i;25342535BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));2536BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));2537BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));25382539for (i = 0; i < args->num_objects; i++) {2540uint32_t object_type;25412542if (*priv_offset + sizeof(object_type) > max_priv_data_size) {2543pr_err("Invalid private data size\n");2544return -EINVAL;2545}25462547ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));2548if (ret) {2549pr_err("Failed to copy private information from user\n");2550goto exit;2551}25522553switch (object_type) {2554case KFD_CRIU_OBJECT_TYPE_QUEUE:2555ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,2556priv_offset, max_priv_data_size);2557if (ret)2558goto exit;2559break;2560case KFD_CRIU_OBJECT_TYPE_EVENT:2561ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,2562priv_offset, max_priv_data_size);2563if (ret)2564goto exit;2565break;2566case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:2567ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,2568priv_offset, max_priv_data_size);2569if (ret)2570goto exit;2571break;2572default:2573pr_err("Invalid object type:%u at index:%d\n", object_type, i);2574ret = -EINVAL;2575goto exit;2576}2577}2578exit:2579return ret;2580}25812582static int criu_restore(struct file *filep,2583struct kfd_process *p,2584struct kfd_ioctl_criu_args *args)2585{2586uint64_t priv_offset = 0;2587int ret = 0;25882589pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",2590args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);25912592if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data ||2593!args->priv_data_size || !args->num_devices)2594return -EINVAL;25952596mutex_lock(&p->mutex);25972598/*2599* Set the process to evicted state to avoid running any new queues before all the memory2600* mappings are ready.2601*/2602ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);2603if (ret)2604goto exit_unlock;26052606/* Each function will adjust priv_offset based on how many bytes they consumed */2607ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);2608if (ret)2609goto exit_unlock;26102611ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);2612if (ret)2613goto exit_unlock;26142615ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);2616if (ret)2617goto exit_unlock;26182619ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);2620if (ret)2621goto exit_unlock;26222623if (priv_offset != args->priv_data_size) {2624pr_err("Invalid private data size\n");2625ret = -EINVAL;2626}26272628exit_unlock:2629mutex_unlock(&p->mutex);2630if (ret)2631pr_err("Failed to restore CRIU ret:%d\n", ret);2632else2633pr_debug("CRIU restore successful\n");26342635return ret;2636}26372638static int criu_unpause(struct file *filep,2639struct kfd_process *p,2640struct kfd_ioctl_criu_args *args)2641{2642int ret;26432644mutex_lock(&p->mutex);26452646if (!p->queues_paused) {2647mutex_unlock(&p->mutex);2648return -EINVAL;2649}26502651ret = kfd_process_restore_queues(p);2652if (ret)2653pr_err("Failed to unpause queues ret:%d\n", ret);2654else2655p->queues_paused = false;26562657mutex_unlock(&p->mutex);26582659return ret;2660}26612662static int criu_resume(struct file *filep,2663struct kfd_process *p,2664struct kfd_ioctl_criu_args *args)2665{2666struct kfd_process *target = NULL;2667struct pid *pid = NULL;2668int ret = 0;26692670pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,2671args->pid);26722673pid = find_get_pid(args->pid);2674if (!pid) {2675pr_err("Cannot find pid info for %i\n", args->pid);2676return -ESRCH;2677}26782679pr_debug("calling kfd_lookup_process_by_pid\n");2680target = kfd_lookup_process_by_pid(pid);26812682put_pid(pid);26832684if (!target) {2685pr_debug("Cannot find process info for %i\n", args->pid);2686return -ESRCH;2687}26882689mutex_lock(&target->mutex);2690ret = kfd_criu_resume_svm(target);2691if (ret) {2692pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);2693goto exit;2694}26952696ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);2697if (ret)2698pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);26992700exit:2701mutex_unlock(&target->mutex);27022703kfd_unref_process(target);2704return ret;2705}27062707static int criu_process_info(struct file *filep,2708struct kfd_process *p,2709struct kfd_ioctl_criu_args *args)2710{2711int ret = 0;27122713mutex_lock(&p->mutex);27142715if (!p->n_pdds) {2716pr_err("No pdd for given process\n");2717ret = -ENODEV;2718goto err_unlock;2719}27202721ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);2722if (ret)2723goto err_unlock;27242725p->queues_paused = true;27262727args->pid = task_pid_nr_ns(p->lead_thread,2728task_active_pid_ns(p->lead_thread));27292730ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,2731&args->num_objects, &args->priv_data_size);2732if (ret)2733goto err_unlock;27342735dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",2736args->num_devices, args->num_bos, args->num_objects,2737args->priv_data_size);27382739err_unlock:2740if (ret) {2741kfd_process_restore_queues(p);2742p->queues_paused = false;2743}2744mutex_unlock(&p->mutex);2745return ret;2746}27472748static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)2749{2750struct kfd_ioctl_criu_args *args = data;2751int ret;27522753dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);2754switch (args->op) {2755case KFD_CRIU_OP_PROCESS_INFO:2756ret = criu_process_info(filep, p, args);2757break;2758case KFD_CRIU_OP_CHECKPOINT:2759ret = criu_checkpoint(filep, p, args);2760break;2761case KFD_CRIU_OP_UNPAUSE:2762ret = criu_unpause(filep, p, args);2763break;2764case KFD_CRIU_OP_RESTORE:2765ret = criu_restore(filep, p, args);2766break;2767case KFD_CRIU_OP_RESUME:2768ret = criu_resume(filep, p, args);2769break;2770default:2771dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);2772ret = -EINVAL;2773break;2774}27752776if (ret)2777dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);27782779return ret;2780}27812782static int runtime_enable(struct kfd_process *p, uint64_t r_debug,2783bool enable_ttmp_setup)2784{2785int i = 0, ret = 0;27862787if (p->is_runtime_retry)2788goto retry;27892790if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)2791return -EBUSY;27922793for (i = 0; i < p->n_pdds; i++) {2794struct kfd_process_device *pdd = p->pdds[i];27952796if (pdd->qpd.queue_count)2797return -EEXIST;27982799/*2800* Setup TTMPs by default.2801* Note that this call must remain here for MES ADD QUEUE to2802* skip_process_ctx_clear unconditionally as the first call to2803* SET_SHADER_DEBUGGER clears any stale process context data2804* saved in MES.2805*/2806if (pdd->dev->kfd->shared_resources.enable_mes)2807kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));2808}28092810p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;2811p->runtime_info.r_debug = r_debug;2812p->runtime_info.ttmp_setup = enable_ttmp_setup;28132814if (p->runtime_info.ttmp_setup) {2815for (i = 0; i < p->n_pdds; i++) {2816struct kfd_process_device *pdd = p->pdds[i];28172818if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {2819amdgpu_gfx_off_ctrl(pdd->dev->adev, false);2820pdd->dev->kfd2kgd->enable_debug_trap(2821pdd->dev->adev,2822true,2823pdd->dev->vm_info.last_vmid_kfd);2824} else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {2825pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(2826pdd->dev->adev,2827false,28280);2829}2830}2831}28322833retry:2834if (p->debug_trap_enabled) {2835if (!p->is_runtime_retry) {2836kfd_dbg_trap_activate(p);2837kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),2838p, NULL, 0, false, NULL, 0);2839}28402841mutex_unlock(&p->mutex);2842ret = down_interruptible(&p->runtime_enable_sema);2843mutex_lock(&p->mutex);28442845p->is_runtime_retry = !!ret;2846}28472848return ret;2849}28502851static int runtime_disable(struct kfd_process *p)2852{2853int i = 0, ret = 0;2854bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED;28552856p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED;2857p->runtime_info.r_debug = 0;28582859if (p->debug_trap_enabled) {2860if (was_enabled)2861kfd_dbg_trap_deactivate(p, false, 0);28622863if (!p->is_runtime_retry)2864kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),2865p, NULL, 0, false, NULL, 0);28662867mutex_unlock(&p->mutex);2868ret = down_interruptible(&p->runtime_enable_sema);2869mutex_lock(&p->mutex);28702871p->is_runtime_retry = !!ret;2872if (ret)2873return ret;2874}28752876if (was_enabled && p->runtime_info.ttmp_setup) {2877for (i = 0; i < p->n_pdds; i++) {2878struct kfd_process_device *pdd = p->pdds[i];28792880if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))2881amdgpu_gfx_off_ctrl(pdd->dev->adev, true);2882}2883}28842885p->runtime_info.ttmp_setup = false;28862887/* disable ttmp setup */2888for (i = 0; i < p->n_pdds; i++) {2889struct kfd_process_device *pdd = p->pdds[i];2890int last_err = 0;28912892if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {2893pdd->spi_dbg_override =2894pdd->dev->kfd2kgd->disable_debug_trap(2895pdd->dev->adev,2896false,2897pdd->dev->vm_info.last_vmid_kfd);28982899if (!pdd->dev->kfd->shared_resources.enable_mes)2900last_err = debug_refresh_runlist(pdd->dev->dqm);2901else2902last_err = kfd_dbg_set_mes_debug_mode(pdd,2903!kfd_dbg_has_cwsr_workaround(pdd->dev));29042905if (last_err)2906ret = last_err;2907}2908}29092910return ret;2911}29122913static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data)2914{2915struct kfd_ioctl_runtime_enable_args *args = data;2916int r;29172918mutex_lock(&p->mutex);29192920if (args->mode_mask & KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK)2921r = runtime_enable(p, args->r_debug,2922!!(args->mode_mask & KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK));2923else2924r = runtime_disable(p);29252926mutex_unlock(&p->mutex);29272928return r;2929}29302931static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data)2932{2933struct kfd_ioctl_dbg_trap_args *args = data;2934struct task_struct *thread = NULL;2935struct mm_struct *mm = NULL;2936struct pid *pid = NULL;2937struct kfd_process *target = NULL;2938struct kfd_process_device *pdd = NULL;2939int r = 0;29402941if (p->context_id != KFD_CONTEXT_ID_PRIMARY) {2942pr_debug("Set debug trap ioctl can not be invoked on non-primary kfd process\n");29432944return -EOPNOTSUPP;2945}29462947if (sched_policy == KFD_SCHED_POLICY_NO_HWS) {2948pr_err("Debugging does not support sched_policy %i", sched_policy);2949return -EINVAL;2950}29512952pid = find_get_pid(args->pid);2953if (!pid) {2954pr_debug("Cannot find pid info for %i\n", args->pid);2955r = -ESRCH;2956goto out;2957}29582959thread = get_pid_task(pid, PIDTYPE_PID);2960if (!thread) {2961r = -ESRCH;2962goto out;2963}29642965mm = get_task_mm(thread);2966if (!mm) {2967r = -ESRCH;2968goto out;2969}29702971if (args->op == KFD_IOC_DBG_TRAP_ENABLE) {2972bool create_process;29732974rcu_read_lock();2975create_process = thread && thread != current && ptrace_parent(thread) == current;2976rcu_read_unlock();29772978target = create_process ? kfd_create_process(thread) :2979kfd_lookup_process_by_pid(pid);2980} else {2981target = kfd_lookup_process_by_pid(pid);2982}29832984if (IS_ERR_OR_NULL(target)) {2985pr_debug("Cannot find process PID %i to debug\n", args->pid);2986r = target ? PTR_ERR(target) : -ESRCH;2987target = NULL;2988goto out;2989}29902991if (target->context_id != KFD_CONTEXT_ID_PRIMARY) {2992pr_debug("Set debug trap ioctl not supported on non-primary kfd process\n");2993r = -EOPNOTSUPP;2994goto out;2995}29962997/* Check if target is still PTRACED. */2998rcu_read_lock();2999if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE3000&& ptrace_parent(target->lead_thread) != current) {3001pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid);3002r = -EPERM;3003}3004rcu_read_unlock();30053006if (r)3007goto out;30083009mutex_lock(&target->mutex);30103011if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target->debug_trap_enabled) {3012pr_err("PID %i not debug enabled for op %i\n", args->pid, args->op);3013r = -EINVAL;3014goto unlock_out;3015}30163017if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_ENABLED &&3018(args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE ||3019args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE ||3020args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ||3021args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES ||3022args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||3023args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH ||3024args->op == KFD_IOC_DBG_TRAP_SET_FLAGS)) {3025r = -EPERM;3026goto unlock_out;3027}30283029if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||3030args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) {3031int user_gpu_id = kfd_process_get_user_gpu_id(target,3032args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ?3033args->set_node_address_watch.gpu_id :3034args->clear_node_address_watch.gpu_id);30353036pdd = kfd_process_device_data_by_id(target, user_gpu_id);3037if (user_gpu_id == -EINVAL || !pdd) {3038r = -ENODEV;3039goto unlock_out;3040}3041}30423043switch (args->op) {3044case KFD_IOC_DBG_TRAP_ENABLE:3045if (target != p)3046target->debugger_process = p;30473048r = kfd_dbg_trap_enable(target,3049args->enable.dbg_fd,3050(void __user *)args->enable.rinfo_ptr,3051&args->enable.rinfo_size);3052if (!r)3053target->exception_enable_mask = args->enable.exception_mask;30543055break;3056case KFD_IOC_DBG_TRAP_DISABLE:3057r = kfd_dbg_trap_disable(target);3058break;3059case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT:3060r = kfd_dbg_send_exception_to_runtime(target,3061args->send_runtime_event.gpu_id,3062args->send_runtime_event.queue_id,3063args->send_runtime_event.exception_mask);3064break;3065case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED:3066kfd_dbg_set_enabled_debug_exception_mask(target,3067args->set_exceptions_enabled.exception_mask);3068break;3069case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:3070r = kfd_dbg_trap_set_wave_launch_override(target,3071args->launch_override.override_mode,3072args->launch_override.enable_mask,3073args->launch_override.support_request_mask,3074&args->launch_override.enable_mask,3075&args->launch_override.support_request_mask);3076break;3077case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:3078r = kfd_dbg_trap_set_wave_launch_mode(target,3079args->launch_mode.launch_mode);3080break;3081case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:3082r = suspend_queues(target,3083args->suspend_queues.num_queues,3084args->suspend_queues.grace_period,3085args->suspend_queues.exception_mask,3086(uint32_t *)args->suspend_queues.queue_array_ptr);30873088break;3089case KFD_IOC_DBG_TRAP_RESUME_QUEUES:3090r = resume_queues(target, args->resume_queues.num_queues,3091(uint32_t *)args->resume_queues.queue_array_ptr);3092break;3093case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:3094r = kfd_dbg_trap_set_dev_address_watch(pdd,3095args->set_node_address_watch.address,3096args->set_node_address_watch.mask,3097&args->set_node_address_watch.id,3098args->set_node_address_watch.mode);3099break;3100case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:3101r = kfd_dbg_trap_clear_dev_address_watch(pdd,3102args->clear_node_address_watch.id);3103break;3104case KFD_IOC_DBG_TRAP_SET_FLAGS:3105r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags);3106break;3107case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:3108r = kfd_dbg_ev_query_debug_event(target,3109&args->query_debug_event.queue_id,3110&args->query_debug_event.gpu_id,3111args->query_debug_event.exception_mask,3112&args->query_debug_event.exception_mask);3113break;3114case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:3115r = kfd_dbg_trap_query_exception_info(target,3116args->query_exception_info.source_id,3117args->query_exception_info.exception_code,3118args->query_exception_info.clear_exception,3119(void __user *)args->query_exception_info.info_ptr,3120&args->query_exception_info.info_size);3121break;3122case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:3123r = pqm_get_queue_snapshot(&target->pqm,3124args->queue_snapshot.exception_mask,3125(void __user *)args->queue_snapshot.snapshot_buf_ptr,3126&args->queue_snapshot.num_queues,3127&args->queue_snapshot.entry_size);3128break;3129case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:3130r = kfd_dbg_trap_device_snapshot(target,3131args->device_snapshot.exception_mask,3132(void __user *)args->device_snapshot.snapshot_buf_ptr,3133&args->device_snapshot.num_devices,3134&args->device_snapshot.entry_size);3135break;3136default:3137pr_err("Invalid option: %i\n", args->op);3138r = -EINVAL;3139}31403141unlock_out:3142mutex_unlock(&target->mutex);31433144out:3145if (thread)3146put_task_struct(thread);31473148if (mm)3149mmput(mm);31503151if (pid)3152put_pid(pid);31533154if (target)3155kfd_unref_process(target);31563157return r;3158}31593160/* userspace programs need to invoke this ioctl explicitly on a FD to3161* create a secondary kfd_process which replacing its primary kfd_process3162*/3163static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, void *data)3164{3165struct kfd_process *process;3166int ret;31673168/* Each FD owns only one kfd_process */3169if (p->context_id != KFD_CONTEXT_ID_PRIMARY)3170return -EINVAL;31713172if (!filep->private_data || !p)3173return -EINVAL;31743175mutex_lock(&kfd_processes_mutex);3176if (p != filep->private_data) {3177mutex_unlock(&kfd_processes_mutex);3178return -EINVAL;3179}31803181process = create_process(current, false);3182if (IS_ERR(process)) {3183mutex_unlock(&kfd_processes_mutex);3184return PTR_ERR(process);3185}31863187filep->private_data = process;3188mutex_unlock(&kfd_processes_mutex);31893190ret = kfd_create_process_sysfs(process);3191if (ret)3192pr_warn("Failed to create sysfs entry for the kfd_process");31933194/* Each open() increases kref of the primary kfd_process,3195* so we need to reduce it here when we create a new secondary process replacing it3196*/3197kfd_unref_process(p);31983199return 0;3200}32013202#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \3203[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \3204.cmd_drv = 0, .name = #ioctl}32053206/** Ioctl table */3207static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {3208AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,3209kfd_ioctl_get_version, 0),32103211AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,3212kfd_ioctl_create_queue, 0),32133214AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,3215kfd_ioctl_destroy_queue, 0),32163217AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,3218kfd_ioctl_set_memory_policy, 0),32193220AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,3221kfd_ioctl_get_clock_counters, 0),32223223AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,3224kfd_ioctl_get_process_apertures, 0),32253226AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,3227kfd_ioctl_update_queue, 0),32283229AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,3230kfd_ioctl_create_event, 0),32313232AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,3233kfd_ioctl_destroy_event, 0),32343235AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,3236kfd_ioctl_set_event, 0),32373238AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,3239kfd_ioctl_reset_event, 0),32403241AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,3242kfd_ioctl_wait_events, 0),32433244AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,3245kfd_ioctl_dbg_register, 0),32463247AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,3248kfd_ioctl_dbg_unregister, 0),32493250AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,3251kfd_ioctl_dbg_address_watch, 0),32523253AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,3254kfd_ioctl_dbg_wave_control, 0),32553256AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,3257kfd_ioctl_set_scratch_backing_va, 0),32583259AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,3260kfd_ioctl_get_tile_config, 0),32613262AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,3263kfd_ioctl_set_trap_handler, 0),32643265AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,3266kfd_ioctl_get_process_apertures_new, 0),32673268AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,3269kfd_ioctl_acquire_vm, 0),32703271AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,3272kfd_ioctl_alloc_memory_of_gpu, 0),32733274AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,3275kfd_ioctl_free_memory_of_gpu, 0),32763277AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,3278kfd_ioctl_map_memory_to_gpu, 0),32793280AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,3281kfd_ioctl_unmap_memory_from_gpu, 0),32823283AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,3284kfd_ioctl_set_cu_mask, 0),32853286AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,3287kfd_ioctl_get_queue_wave_state, 0),32883289AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,3290kfd_ioctl_get_dmabuf_info, 0),32913292AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,3293kfd_ioctl_import_dmabuf, 0),32943295AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,3296kfd_ioctl_alloc_queue_gws, 0),32973298AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,3299kfd_ioctl_smi_events, 0),33003301AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),33023303AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,3304kfd_ioctl_set_xnack_mode, 0),33053306AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,3307kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),33083309AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,3310kfd_ioctl_get_available_memory, 0),33113312AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF,3313kfd_ioctl_export_dmabuf, 0),33143315AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE,3316kfd_ioctl_runtime_enable, 0),33173318AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP,3319kfd_ioctl_set_debug_trap, 0),33203321AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_PROCESS,3322kfd_ioctl_create_process, 0),3323};33243325#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)33263327static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)3328{3329struct kfd_process *process;3330amdkfd_ioctl_t *func;3331const struct amdkfd_ioctl_desc *ioctl = NULL;3332unsigned int nr = _IOC_NR(cmd);3333char stack_kdata[128];3334char *kdata = NULL;3335unsigned int usize, asize;3336int retcode = -EINVAL;3337bool ptrace_attached = false;33383339if (nr >= AMDKFD_CORE_IOCTL_COUNT) {3340retcode = -ENOTTY;3341goto err_i1;3342}33433344if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {3345u32 amdkfd_size;33463347ioctl = &amdkfd_ioctls[nr];33483349amdkfd_size = _IOC_SIZE(ioctl->cmd);3350usize = asize = _IOC_SIZE(cmd);3351if (amdkfd_size > asize)3352asize = amdkfd_size;33533354cmd = ioctl->cmd;3355} else {3356retcode = -ENOTTY;3357goto err_i1;3358}33593360dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);33613362/* Get the process struct from the filep. Only the process3363* that opened /dev/kfd can use the file descriptor. Child3364* processes need to create their own KFD device context.3365*/3366process = filep->private_data;33673368rcu_read_lock();3369if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&3370ptrace_parent(process->lead_thread) == current)3371ptrace_attached = true;3372rcu_read_unlock();33733374if (process->lead_thread != current->group_leader3375&& !ptrace_attached) {3376dev_dbg(kfd_device, "Using KFD FD in wrong process\n");3377retcode = -EBADF;3378goto err_i1;3379}33803381/* Do not trust userspace, use our own definition */3382func = ioctl->func;33833384if (unlikely(!func)) {3385dev_dbg(kfd_device, "no function\n");3386retcode = -EINVAL;3387goto err_i1;3388}33893390/*3391* Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support3392* CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a3393* more priviledged access.3394*/3395if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {3396if (!capable(CAP_CHECKPOINT_RESTORE) &&3397!capable(CAP_SYS_ADMIN)) {3398retcode = -EACCES;3399goto err_i1;3400}3401}34023403if (cmd & (IOC_IN | IOC_OUT)) {3404if (asize <= sizeof(stack_kdata)) {3405kdata = stack_kdata;3406} else {3407kdata = kmalloc(asize, GFP_KERNEL);3408if (!kdata) {3409retcode = -ENOMEM;3410goto err_i1;3411}3412}3413if (asize > usize)3414memset(kdata + usize, 0, asize - usize);3415}34163417if (cmd & IOC_IN) {3418if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {3419retcode = -EFAULT;3420goto err_i1;3421}3422} else if (cmd & IOC_OUT) {3423memset(kdata, 0, usize);3424}34253426retcode = func(filep, process, kdata);34273428if (cmd & IOC_OUT)3429if (copy_to_user((void __user *)arg, kdata, usize) != 0)3430retcode = -EFAULT;34313432err_i1:3433if (!ioctl)3434dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",3435task_pid_nr(current), cmd, nr);34363437if (kdata != stack_kdata)3438kfree(kdata);34393440if (retcode)3441dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",3442nr, arg, retcode);34433444return retcode;3445}34463447static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,3448struct vm_area_struct *vma)3449{3450phys_addr_t address;34513452if (vma->vm_end - vma->vm_start != PAGE_SIZE)3453return -EINVAL;34543455if (PAGE_SIZE > 4096)3456return -EINVAL;34573458address = dev->adev->rmmio_remap.bus_addr;34593460vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |3461VM_DONTDUMP | VM_PFNMAP);34623463vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);34643465pr_debug("process pid %d mapping mmio page\n"3466" target user address == 0x%08llX\n"3467" physical address == 0x%08llX\n"3468" vm_flags == 0x%04lX\n"3469" size == 0x%04lX\n",3470process->lead_thread->pid, (unsigned long long) vma->vm_start,3471address, vma->vm_flags, PAGE_SIZE);34723473return io_remap_pfn_range(vma,3474vma->vm_start,3475address >> PAGE_SHIFT,3476PAGE_SIZE,3477vma->vm_page_prot);3478}347934803481static int kfd_mmap(struct file *filep, struct vm_area_struct *vma)3482{3483struct kfd_process *process;3484struct kfd_node *dev = NULL;3485unsigned long mmap_offset;3486unsigned int gpu_id;34873488process = filep->private_data;3489if (!process)3490return -ESRCH;34913492if (process->lead_thread != current->group_leader)3493return -EBADF;34943495mmap_offset = vma->vm_pgoff << PAGE_SHIFT;3496gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);3497if (gpu_id)3498dev = kfd_device_by_id(gpu_id);34993500switch (mmap_offset & KFD_MMAP_TYPE_MASK) {3501case KFD_MMAP_TYPE_DOORBELL:3502if (!dev)3503return -ENODEV;3504return kfd_doorbell_mmap(dev, process, vma);35053506case KFD_MMAP_TYPE_EVENTS:3507return kfd_event_mmap(process, vma);35083509case KFD_MMAP_TYPE_RESERVED_MEM:3510if (!dev)3511return -ENODEV;3512return kfd_reserved_mem_mmap(dev, process, vma);3513case KFD_MMAP_TYPE_MMIO:3514if (!dev)3515return -ENODEV;3516return kfd_mmio_mmap(dev, process, vma);3517}35183519return -EFAULT;3520}352135223523