Path: blob/master/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
29285 views
// SPDX-License-Identifier: GPL-2.0 OR MIT1/*2* Copyright 2014-2022 Advanced Micro Devices, Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included in12* all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR20* OTHER DEALINGS IN THE SOFTWARE.21*/2223#include <linux/device.h>24#include <linux/err.h>25#include <linux/fs.h>26#include <linux/file.h>27#include <linux/sched.h>28#include <linux/slab.h>29#include <linux/uaccess.h>30#include <linux/compat.h>31#include <uapi/linux/kfd_ioctl.h>32#include <linux/time.h>33#include <linux/mm.h>34#include <linux/mman.h>35#include <linux/ptrace.h>36#include <linux/dma-buf.h>37#include <linux/processor.h>38#include "kfd_priv.h"39#include "kfd_device_queue_manager.h"40#include "kfd_svm.h"41#include "amdgpu_amdkfd.h"42#include "kfd_smi_events.h"43#include "amdgpu_dma_buf.h"44#include "kfd_debug.h"4546static long kfd_ioctl(struct file *, unsigned int, unsigned long);47static int kfd_open(struct inode *, struct file *);48static int kfd_release(struct inode *, struct file *);49static int kfd_mmap(struct file *, struct vm_area_struct *);5051static const char kfd_dev_name[] = "kfd";5253static const struct file_operations kfd_fops = {54.owner = THIS_MODULE,55.unlocked_ioctl = kfd_ioctl,56.compat_ioctl = compat_ptr_ioctl,57.open = kfd_open,58.release = kfd_release,59.mmap = kfd_mmap,60};6162static int kfd_char_dev_major = -1;63struct device *kfd_device;64static const struct class kfd_class = {65.name = kfd_dev_name,66};6768static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)69{70struct kfd_process_device *pdd;7172mutex_lock(&p->mutex);73pdd = kfd_process_device_data_by_id(p, gpu_id);7475if (pdd)76return pdd;7778mutex_unlock(&p->mutex);79return NULL;80}8182static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)83{84mutex_unlock(&pdd->process->mutex);85}8687int kfd_chardev_init(void)88{89int err = 0;9091kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);92err = kfd_char_dev_major;93if (err < 0)94goto err_register_chrdev;9596err = class_register(&kfd_class);97if (err)98goto err_class_create;99100kfd_device = device_create(&kfd_class, NULL,101MKDEV(kfd_char_dev_major, 0),102NULL, kfd_dev_name);103err = PTR_ERR(kfd_device);104if (IS_ERR(kfd_device))105goto err_device_create;106107return 0;108109err_device_create:110class_unregister(&kfd_class);111err_class_create:112unregister_chrdev(kfd_char_dev_major, kfd_dev_name);113err_register_chrdev:114return err;115}116117void kfd_chardev_exit(void)118{119device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0));120class_unregister(&kfd_class);121unregister_chrdev(kfd_char_dev_major, kfd_dev_name);122kfd_device = NULL;123}124125126static int kfd_open(struct inode *inode, struct file *filep)127{128struct kfd_process *process;129bool is_32bit_user_mode;130131if (iminor(inode) != 0)132return -ENODEV;133134is_32bit_user_mode = in_compat_syscall();135136if (is_32bit_user_mode) {137dev_warn(kfd_device,138"Process %d (32-bit) failed to open /dev/kfd\n"139"32-bit processes are not supported by amdkfd\n",140current->pid);141return -EPERM;142}143144process = kfd_create_process(current);145if (IS_ERR(process))146return PTR_ERR(process);147148if (kfd_process_init_cwsr_apu(process, filep)) {149kfd_unref_process(process);150return -EFAULT;151}152153/* filep now owns the reference returned by kfd_create_process */154filep->private_data = process;155156dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n",157process->lead_thread->pid, process->is_32bit_user_mode);158159return 0;160}161162static int kfd_release(struct inode *inode, struct file *filep)163{164struct kfd_process *process = filep->private_data;165166if (process)167kfd_unref_process(process);168169return 0;170}171172static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,173void *data)174{175struct kfd_ioctl_get_version_args *args = data;176177args->major_version = KFD_IOCTL_MAJOR_VERSION;178args->minor_version = KFD_IOCTL_MINOR_VERSION;179180return 0;181}182183static int set_queue_properties_from_user(struct queue_properties *q_properties,184struct kfd_ioctl_create_queue_args *args)185{186/*187* Repurpose queue percentage to accommodate new features:188* bit 0-7: queue percentage189* bit 8-15: pm4_target_xcc190*/191if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {192pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");193return -EINVAL;194}195196if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {197pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");198return -EINVAL;199}200201if ((args->ring_base_address) &&202(!access_ok((const void __user *) args->ring_base_address,203sizeof(uint64_t)))) {204pr_err("Can't access ring base address\n");205return -EFAULT;206}207208if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {209pr_err("Ring size must be a power of 2 or 0\n");210return -EINVAL;211}212213if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {214args->ring_size = KFD_MIN_QUEUE_RING_SIZE;215pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");216}217218if (!access_ok((const void __user *) args->read_pointer_address,219sizeof(uint32_t))) {220pr_err("Can't access read pointer\n");221return -EFAULT;222}223224if (!access_ok((const void __user *) args->write_pointer_address,225sizeof(uint32_t))) {226pr_err("Can't access write pointer\n");227return -EFAULT;228}229230if (args->eop_buffer_address &&231!access_ok((const void __user *) args->eop_buffer_address,232sizeof(uint32_t))) {233pr_debug("Can't access eop buffer");234return -EFAULT;235}236237if (args->ctx_save_restore_address &&238!access_ok((const void __user *) args->ctx_save_restore_address,239sizeof(uint32_t))) {240pr_debug("Can't access ctx save restore buffer");241return -EFAULT;242}243244q_properties->is_interop = false;245q_properties->is_gws = false;246q_properties->queue_percent = args->queue_percentage & 0xFF;247/* bit 8-15 are repurposed to be PM4 target XCC */248q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;249q_properties->priority = args->queue_priority;250q_properties->queue_address = args->ring_base_address;251q_properties->queue_size = args->ring_size;252q_properties->read_ptr = (void __user *)args->read_pointer_address;253q_properties->write_ptr = (void __user *)args->write_pointer_address;254q_properties->eop_ring_buffer_address = args->eop_buffer_address;255q_properties->eop_ring_buffer_size = args->eop_buffer_size;256q_properties->ctx_save_restore_area_address =257args->ctx_save_restore_address;258q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;259q_properties->ctl_stack_size = args->ctl_stack_size;260q_properties->sdma_engine_id = args->sdma_engine_id;261if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||262args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)263q_properties->type = KFD_QUEUE_TYPE_COMPUTE;264else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)265q_properties->type = KFD_QUEUE_TYPE_SDMA;266else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)267q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;268else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID)269q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID;270else271return -ENOTSUPP;272273if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)274q_properties->format = KFD_QUEUE_FORMAT_AQL;275else276q_properties->format = KFD_QUEUE_FORMAT_PM4;277278pr_debug("Queue Percentage: %d, %d\n",279q_properties->queue_percent, args->queue_percentage);280281pr_debug("Queue Priority: %d, %d\n",282q_properties->priority, args->queue_priority);283284pr_debug("Queue Address: 0x%llX, 0x%llX\n",285q_properties->queue_address, args->ring_base_address);286287pr_debug("Queue Size: 0x%llX, %u\n",288q_properties->queue_size, args->ring_size);289290pr_debug("Queue r/w Pointers: %px, %px\n",291q_properties->read_ptr,292q_properties->write_ptr);293294pr_debug("Queue Format: %d\n", q_properties->format);295296pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);297298pr_debug("Queue CTX save area: 0x%llX\n",299q_properties->ctx_save_restore_area_address);300301return 0;302}303304static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,305void *data)306{307struct kfd_ioctl_create_queue_args *args = data;308struct kfd_node *dev;309int err = 0;310unsigned int queue_id;311struct kfd_process_device *pdd;312struct queue_properties q_properties;313uint32_t doorbell_offset_in_process = 0;314315memset(&q_properties, 0, sizeof(struct queue_properties));316317pr_debug("Creating queue ioctl\n");318319err = set_queue_properties_from_user(&q_properties, args);320if (err)321return err;322323pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);324325mutex_lock(&p->mutex);326327pdd = kfd_process_device_data_by_id(p, args->gpu_id);328if (!pdd) {329pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);330err = -EINVAL;331goto err_pdd;332}333dev = pdd->dev;334335pdd = kfd_bind_process_to_device(dev, p);336if (IS_ERR(pdd)) {337err = -ESRCH;338goto err_bind_process;339}340341if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {342int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) +343kfd_get_num_xgmi_sdma_engines(dev) - 1;344345if (q_properties.sdma_engine_id > max_sdma_eng_id) {346err = -EINVAL;347pr_err("sdma_engine_id %i exceeds maximum id of %i\n",348q_properties.sdma_engine_id, max_sdma_eng_id);349goto err_sdma_engine_id;350}351}352353if (!pdd->qpd.proc_doorbells) {354err = kfd_alloc_process_doorbells(dev->kfd, pdd);355if (err) {356pr_debug("failed to allocate process doorbells\n");357goto err_bind_process;358}359}360361err = kfd_queue_acquire_buffers(pdd, &q_properties);362if (err) {363pr_debug("failed to acquire user queue buffers\n");364goto err_acquire_queue_buf;365}366367pr_debug("Creating queue for process pid %d on gpu 0x%x\n",368p->lead_thread->pid,369dev->id);370371err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id,372NULL, NULL, NULL, &doorbell_offset_in_process);373if (err != 0)374goto err_create_queue;375376args->queue_id = queue_id;377378379/* Return gpu_id as doorbell offset for mmap usage */380args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;381args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);382if (KFD_IS_SOC15(dev))383/* On SOC15 ASICs, include the doorbell offset within the384* process doorbell frame, which is 2 pages.385*/386args->doorbell_offset |= doorbell_offset_in_process;387388mutex_unlock(&p->mutex);389390pr_debug("Queue id %d was created successfully\n", args->queue_id);391392pr_debug("Ring buffer address == 0x%016llX\n",393args->ring_base_address);394395pr_debug("Read ptr address == 0x%016llX\n",396args->read_pointer_address);397398pr_debug("Write ptr address == 0x%016llX\n",399args->write_pointer_address);400401kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0);402return 0;403404err_create_queue:405kfd_queue_unref_bo_vas(pdd, &q_properties);406kfd_queue_release_buffers(pdd, &q_properties);407err_acquire_queue_buf:408err_sdma_engine_id:409err_bind_process:410err_pdd:411mutex_unlock(&p->mutex);412return err;413}414415static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,416void *data)417{418int retval;419struct kfd_ioctl_destroy_queue_args *args = data;420421pr_debug("Destroying queue id %d for process pid %d\n",422args->queue_id,423p->lead_thread->pid);424425mutex_lock(&p->mutex);426427retval = pqm_destroy_queue(&p->pqm, args->queue_id);428429mutex_unlock(&p->mutex);430return retval;431}432433static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,434void *data)435{436int retval;437struct kfd_ioctl_update_queue_args *args = data;438struct queue_properties properties;439440/*441* Repurpose queue percentage to accommodate new features:442* bit 0-7: queue percentage443* bit 8-15: pm4_target_xcc444*/445if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {446pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");447return -EINVAL;448}449450if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {451pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");452return -EINVAL;453}454455if ((args->ring_base_address) &&456(!access_ok((const void __user *) args->ring_base_address,457sizeof(uint64_t)))) {458pr_err("Can't access ring base address\n");459return -EFAULT;460}461462if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {463pr_err("Ring size must be a power of 2 or 0\n");464return -EINVAL;465}466467if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {468args->ring_size = KFD_MIN_QUEUE_RING_SIZE;469pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");470}471472properties.queue_address = args->ring_base_address;473properties.queue_size = args->ring_size;474properties.queue_percent = args->queue_percentage & 0xFF;475/* bit 8-15 are repurposed to be PM4 target XCC */476properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;477properties.priority = args->queue_priority;478479pr_debug("Updating queue id %d for process pid %d\n",480args->queue_id, p->lead_thread->pid);481482mutex_lock(&p->mutex);483484retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties);485486mutex_unlock(&p->mutex);487488return retval;489}490491static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,492void *data)493{494int retval;495const int max_num_cus = 1024;496struct kfd_ioctl_set_cu_mask_args *args = data;497struct mqd_update_info minfo = {0};498uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;499size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);500501if ((args->num_cu_mask % 32) != 0) {502pr_debug("num_cu_mask 0x%x must be a multiple of 32",503args->num_cu_mask);504return -EINVAL;505}506507minfo.cu_mask.count = args->num_cu_mask;508if (minfo.cu_mask.count == 0) {509pr_debug("CU mask cannot be 0");510return -EINVAL;511}512513/* To prevent an unreasonably large CU mask size, set an arbitrary514* limit of max_num_cus bits. We can then just drop any CU mask bits515* past max_num_cus bits and just use the first max_num_cus bits.516*/517if (minfo.cu_mask.count > max_num_cus) {518pr_debug("CU mask cannot be greater than 1024 bits");519minfo.cu_mask.count = max_num_cus;520cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);521}522523minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size);524if (IS_ERR(minfo.cu_mask.ptr)) {525pr_debug("Could not copy CU mask from userspace");526return PTR_ERR(minfo.cu_mask.ptr);527}528529mutex_lock(&p->mutex);530531retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);532533mutex_unlock(&p->mutex);534535kfree(minfo.cu_mask.ptr);536return retval;537}538539static int kfd_ioctl_get_queue_wave_state(struct file *filep,540struct kfd_process *p, void *data)541{542struct kfd_ioctl_get_queue_wave_state_args *args = data;543int r;544545mutex_lock(&p->mutex);546547r = pqm_get_wave_state(&p->pqm, args->queue_id,548(void __user *)args->ctl_stack_address,549&args->ctl_stack_used_size,550&args->save_area_used_size);551552mutex_unlock(&p->mutex);553554return r;555}556557static int kfd_ioctl_set_memory_policy(struct file *filep,558struct kfd_process *p, void *data)559{560struct kfd_ioctl_set_memory_policy_args *args = data;561int err = 0;562struct kfd_process_device *pdd;563enum cache_policy default_policy, alternate_policy;564565if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT566&& args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {567return -EINVAL;568}569570if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT571&& args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {572return -EINVAL;573}574575mutex_lock(&p->mutex);576pdd = kfd_process_device_data_by_id(p, args->gpu_id);577if (!pdd) {578pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);579err = -EINVAL;580goto err_pdd;581}582583pdd = kfd_bind_process_to_device(pdd->dev, p);584if (IS_ERR(pdd)) {585err = -ESRCH;586goto out;587}588589default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)590? cache_policy_coherent : cache_policy_noncoherent;591592alternate_policy =593(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)594? cache_policy_coherent : cache_policy_noncoherent;595596if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,597&pdd->qpd,598default_policy,599alternate_policy,600(void __user *)args->alternate_aperture_base,601args->alternate_aperture_size,602args->misc_process_flag))603err = -EINVAL;604605out:606err_pdd:607mutex_unlock(&p->mutex);608609return err;610}611612static int kfd_ioctl_set_trap_handler(struct file *filep,613struct kfd_process *p, void *data)614{615struct kfd_ioctl_set_trap_handler_args *args = data;616int err = 0;617struct kfd_process_device *pdd;618619mutex_lock(&p->mutex);620621pdd = kfd_process_device_data_by_id(p, args->gpu_id);622if (!pdd) {623err = -EINVAL;624goto err_pdd;625}626627pdd = kfd_bind_process_to_device(pdd->dev, p);628if (IS_ERR(pdd)) {629err = -ESRCH;630goto out;631}632633kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);634635out:636err_pdd:637mutex_unlock(&p->mutex);638639return err;640}641642static int kfd_ioctl_dbg_register(struct file *filep,643struct kfd_process *p, void *data)644{645return -EPERM;646}647648static int kfd_ioctl_dbg_unregister(struct file *filep,649struct kfd_process *p, void *data)650{651return -EPERM;652}653654static int kfd_ioctl_dbg_address_watch(struct file *filep,655struct kfd_process *p, void *data)656{657return -EPERM;658}659660/* Parse and generate fixed size data structure for wave control */661static int kfd_ioctl_dbg_wave_control(struct file *filep,662struct kfd_process *p, void *data)663{664return -EPERM;665}666667static int kfd_ioctl_get_clock_counters(struct file *filep,668struct kfd_process *p, void *data)669{670struct kfd_ioctl_get_clock_counters_args *args = data;671struct kfd_process_device *pdd;672673mutex_lock(&p->mutex);674pdd = kfd_process_device_data_by_id(p, args->gpu_id);675mutex_unlock(&p->mutex);676if (pdd)677/* Reading GPU clock counter from KGD */678args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);679else680/* Node without GPU resource */681args->gpu_clock_counter = 0;682683/* No access to rdtsc. Using raw monotonic time */684args->cpu_clock_counter = ktime_get_raw_ns();685args->system_clock_counter = ktime_get_boottime_ns();686687/* Since the counter is in nano-seconds we use 1GHz frequency */688args->system_clock_freq = 1000000000;689690return 0;691}692693694static int kfd_ioctl_get_process_apertures(struct file *filp,695struct kfd_process *p, void *data)696{697struct kfd_ioctl_get_process_apertures_args *args = data;698struct kfd_process_device_apertures *pAperture;699int i;700701dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid);702703args->num_of_nodes = 0;704705mutex_lock(&p->mutex);706/* Run over all pdd of the process */707for (i = 0; i < p->n_pdds; i++) {708struct kfd_process_device *pdd = p->pdds[i];709710pAperture =711&args->process_apertures[args->num_of_nodes];712pAperture->gpu_id = pdd->dev->id;713pAperture->lds_base = pdd->lds_base;714pAperture->lds_limit = pdd->lds_limit;715pAperture->gpuvm_base = pdd->gpuvm_base;716pAperture->gpuvm_limit = pdd->gpuvm_limit;717pAperture->scratch_base = pdd->scratch_base;718pAperture->scratch_limit = pdd->scratch_limit;719720dev_dbg(kfd_device,721"node id %u\n", args->num_of_nodes);722dev_dbg(kfd_device,723"gpu id %u\n", pdd->dev->id);724dev_dbg(kfd_device,725"lds_base %llX\n", pdd->lds_base);726dev_dbg(kfd_device,727"lds_limit %llX\n", pdd->lds_limit);728dev_dbg(kfd_device,729"gpuvm_base %llX\n", pdd->gpuvm_base);730dev_dbg(kfd_device,731"gpuvm_limit %llX\n", pdd->gpuvm_limit);732dev_dbg(kfd_device,733"scratch_base %llX\n", pdd->scratch_base);734dev_dbg(kfd_device,735"scratch_limit %llX\n", pdd->scratch_limit);736737if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)738break;739}740mutex_unlock(&p->mutex);741742return 0;743}744745static int kfd_ioctl_get_process_apertures_new(struct file *filp,746struct kfd_process *p, void *data)747{748struct kfd_ioctl_get_process_apertures_new_args *args = data;749struct kfd_process_device_apertures *pa;750int ret;751int i;752753dev_dbg(kfd_device, "get apertures for process pid %d",754p->lead_thread->pid);755756if (args->num_of_nodes == 0) {757/* Return number of nodes, so that user space can alloacate758* sufficient memory759*/760mutex_lock(&p->mutex);761args->num_of_nodes = p->n_pdds;762goto out_unlock;763}764765/* Fill in process-aperture information for all available766* nodes, but not more than args->num_of_nodes as that is767* the amount of memory allocated by user768*/769pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),770GFP_KERNEL);771if (!pa)772return -ENOMEM;773774mutex_lock(&p->mutex);775776if (!p->n_pdds) {777args->num_of_nodes = 0;778kfree(pa);779goto out_unlock;780}781782/* Run over all pdd of the process */783for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {784struct kfd_process_device *pdd = p->pdds[i];785786pa[i].gpu_id = pdd->dev->id;787pa[i].lds_base = pdd->lds_base;788pa[i].lds_limit = pdd->lds_limit;789pa[i].gpuvm_base = pdd->gpuvm_base;790pa[i].gpuvm_limit = pdd->gpuvm_limit;791pa[i].scratch_base = pdd->scratch_base;792pa[i].scratch_limit = pdd->scratch_limit;793794dev_dbg(kfd_device,795"gpu id %u\n", pdd->dev->id);796dev_dbg(kfd_device,797"lds_base %llX\n", pdd->lds_base);798dev_dbg(kfd_device,799"lds_limit %llX\n", pdd->lds_limit);800dev_dbg(kfd_device,801"gpuvm_base %llX\n", pdd->gpuvm_base);802dev_dbg(kfd_device,803"gpuvm_limit %llX\n", pdd->gpuvm_limit);804dev_dbg(kfd_device,805"scratch_base %llX\n", pdd->scratch_base);806dev_dbg(kfd_device,807"scratch_limit %llX\n", pdd->scratch_limit);808}809mutex_unlock(&p->mutex);810811args->num_of_nodes = i;812ret = copy_to_user(813(void __user *)args->kfd_process_device_apertures_ptr,814pa,815(i * sizeof(struct kfd_process_device_apertures)));816kfree(pa);817return ret ? -EFAULT : 0;818819out_unlock:820mutex_unlock(&p->mutex);821return 0;822}823824static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,825void *data)826{827struct kfd_ioctl_create_event_args *args = data;828int err;829830/* For dGPUs the event page is allocated in user mode. The831* handle is passed to KFD with the first call to this IOCTL832* through the event_page_offset field.833*/834if (args->event_page_offset) {835mutex_lock(&p->mutex);836err = kfd_kmap_event_page(p, args->event_page_offset);837mutex_unlock(&p->mutex);838if (err)839return err;840}841842err = kfd_event_create(filp, p, args->event_type,843args->auto_reset != 0, args->node_id,844&args->event_id, &args->event_trigger_data,845&args->event_page_offset,846&args->event_slot_index);847848pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);849return err;850}851852static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,853void *data)854{855struct kfd_ioctl_destroy_event_args *args = data;856857return kfd_event_destroy(p, args->event_id);858}859860static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,861void *data)862{863struct kfd_ioctl_set_event_args *args = data;864865return kfd_set_event(p, args->event_id);866}867868static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,869void *data)870{871struct kfd_ioctl_reset_event_args *args = data;872873return kfd_reset_event(p, args->event_id);874}875876static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,877void *data)878{879struct kfd_ioctl_wait_events_args *args = data;880881return kfd_wait_on_events(p, args->num_events,882(void __user *)args->events_ptr,883(args->wait_for_all != 0),884&args->timeout, &args->wait_result);885}886static int kfd_ioctl_set_scratch_backing_va(struct file *filep,887struct kfd_process *p, void *data)888{889struct kfd_ioctl_set_scratch_backing_va_args *args = data;890struct kfd_process_device *pdd;891struct kfd_node *dev;892long err;893894mutex_lock(&p->mutex);895pdd = kfd_process_device_data_by_id(p, args->gpu_id);896if (!pdd) {897err = -EINVAL;898goto err_pdd;899}900dev = pdd->dev;901902pdd = kfd_bind_process_to_device(dev, p);903if (IS_ERR(pdd)) {904err = PTR_ERR(pdd);905goto bind_process_to_device_fail;906}907908pdd->qpd.sh_hidden_private_base = args->va_addr;909910mutex_unlock(&p->mutex);911912if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&913pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)914dev->kfd2kgd->set_scratch_backing_va(915dev->adev, args->va_addr, pdd->qpd.vmid);916917return 0;918919bind_process_to_device_fail:920err_pdd:921mutex_unlock(&p->mutex);922return err;923}924925static int kfd_ioctl_get_tile_config(struct file *filep,926struct kfd_process *p, void *data)927{928struct kfd_ioctl_get_tile_config_args *args = data;929struct kfd_process_device *pdd;930struct tile_config config;931int err = 0;932933mutex_lock(&p->mutex);934pdd = kfd_process_device_data_by_id(p, args->gpu_id);935mutex_unlock(&p->mutex);936if (!pdd)937return -EINVAL;938939amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);940941args->gb_addr_config = config.gb_addr_config;942args->num_banks = config.num_banks;943args->num_ranks = config.num_ranks;944945if (args->num_tile_configs > config.num_tile_configs)946args->num_tile_configs = config.num_tile_configs;947err = copy_to_user((void __user *)args->tile_config_ptr,948config.tile_config_ptr,949args->num_tile_configs * sizeof(uint32_t));950if (err) {951args->num_tile_configs = 0;952return -EFAULT;953}954955if (args->num_macro_tile_configs > config.num_macro_tile_configs)956args->num_macro_tile_configs =957config.num_macro_tile_configs;958err = copy_to_user((void __user *)args->macro_tile_config_ptr,959config.macro_tile_config_ptr,960args->num_macro_tile_configs * sizeof(uint32_t));961if (err) {962args->num_macro_tile_configs = 0;963return -EFAULT;964}965966return 0;967}968969static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,970void *data)971{972struct kfd_ioctl_acquire_vm_args *args = data;973struct kfd_process_device *pdd;974struct file *drm_file;975int ret;976977drm_file = fget(args->drm_fd);978if (!drm_file)979return -EINVAL;980981mutex_lock(&p->mutex);982pdd = kfd_process_device_data_by_id(p, args->gpu_id);983if (!pdd) {984ret = -EINVAL;985goto err_pdd;986}987988if (pdd->drm_file) {989ret = pdd->drm_file == drm_file ? 0 : -EBUSY;990goto err_drm_file;991}992993ret = kfd_process_device_init_vm(pdd, drm_file);994if (ret)995goto err_unlock;996997/* On success, the PDD keeps the drm_file reference */998mutex_unlock(&p->mutex);9991000return 0;10011002err_unlock:1003err_pdd:1004err_drm_file:1005mutex_unlock(&p->mutex);1006fput(drm_file);1007return ret;1008}10091010bool kfd_dev_is_large_bar(struct kfd_node *dev)1011{1012if (dev->kfd->adev->debug_largebar) {1013pr_debug("Simulate large-bar allocation on non large-bar machine\n");1014return true;1015}10161017if (dev->local_mem_info.local_mem_size_private == 0 &&1018dev->local_mem_info.local_mem_size_public > 0)1019return true;10201021if (dev->local_mem_info.local_mem_size_public == 0 &&1022dev->kfd->adev->gmc.is_app_apu) {1023pr_debug("APP APU, Consider like a large bar system\n");1024return true;1025}10261027return false;1028}10291030static int kfd_ioctl_get_available_memory(struct file *filep,1031struct kfd_process *p, void *data)1032{1033struct kfd_ioctl_get_available_memory_args *args = data;1034struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);10351036if (!pdd)1037return -EINVAL;1038args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev,1039pdd->dev->node_id);1040kfd_unlock_pdd(pdd);1041return 0;1042}10431044static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,1045struct kfd_process *p, void *data)1046{1047struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;1048struct kfd_process_device *pdd;1049void *mem;1050struct kfd_node *dev;1051int idr_handle;1052long err;1053uint64_t offset = args->mmap_offset;1054uint32_t flags = args->flags;10551056if (args->size == 0)1057return -EINVAL;10581059#if IS_ENABLED(CONFIG_HSA_AMD_SVM)1060/* Flush pending deferred work to avoid racing with deferred actions1061* from previous memory map changes (e.g. munmap).1062*/1063svm_range_list_lock_and_flush_work(&p->svms, current->mm);1064mutex_lock(&p->svms.lock);1065mmap_write_unlock(current->mm);10661067/* Skip a special case that allocates VRAM without VA,1068* VA will be invalid of 0.1069*/1070if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) &&1071interval_tree_iter_first(&p->svms.objects,1072args->va_addr >> PAGE_SHIFT,1073(args->va_addr + args->size - 1) >> PAGE_SHIFT)) {1074pr_err("Address: 0x%llx already allocated by SVM\n",1075args->va_addr);1076mutex_unlock(&p->svms.lock);1077return -EADDRINUSE;1078}10791080/* When register user buffer check if it has been registered by svm by1081* buffer cpu virtual address.1082*/1083if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&1084interval_tree_iter_first(&p->svms.objects,1085args->mmap_offset >> PAGE_SHIFT,1086(args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) {1087pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",1088args->mmap_offset);1089mutex_unlock(&p->svms.lock);1090return -EADDRINUSE;1091}10921093mutex_unlock(&p->svms.lock);1094#endif1095mutex_lock(&p->mutex);1096pdd = kfd_process_device_data_by_id(p, args->gpu_id);1097if (!pdd) {1098err = -EINVAL;1099goto err_pdd;1100}11011102dev = pdd->dev;11031104if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&1105(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&1106!kfd_dev_is_large_bar(dev)) {1107pr_err("Alloc host visible vram on small bar is not allowed\n");1108err = -EINVAL;1109goto err_large_bar;1110}11111112pdd = kfd_bind_process_to_device(dev, p);1113if (IS_ERR(pdd)) {1114err = PTR_ERR(pdd);1115goto err_unlock;1116}11171118if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {1119if (args->size != kfd_doorbell_process_slice(dev->kfd)) {1120err = -EINVAL;1121goto err_unlock;1122}1123offset = kfd_get_process_doorbells(pdd);1124if (!offset) {1125err = -ENOMEM;1126goto err_unlock;1127}1128} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {1129if (args->size != PAGE_SIZE) {1130err = -EINVAL;1131goto err_unlock;1132}1133offset = dev->adev->rmmio_remap.bus_addr;1134if (!offset || (PAGE_SIZE > 4096)) {1135err = -ENOMEM;1136goto err_unlock;1137}1138}11391140err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(1141dev->adev, args->va_addr, args->size,1142pdd->drm_priv, (struct kgd_mem **) &mem, &offset,1143flags, false);11441145if (err)1146goto err_unlock;11471148idr_handle = kfd_process_device_create_obj_handle(pdd, mem);1149if (idr_handle < 0) {1150err = -EFAULT;1151goto err_free;1152}11531154/* Update the VRAM usage count */1155if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {1156uint64_t size = args->size;11571158if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)1159size >>= 1;1160atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);1161}11621163mutex_unlock(&p->mutex);11641165args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);1166args->mmap_offset = offset;11671168/* MMIO is mapped through kfd device1169* Generate a kfd mmap offset1170*/1171if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)1172args->mmap_offset = KFD_MMAP_TYPE_MMIO1173| KFD_MMAP_GPU_ID(args->gpu_id);11741175return 0;11761177err_free:1178amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,1179pdd->drm_priv, NULL);1180err_unlock:1181err_pdd:1182err_large_bar:1183mutex_unlock(&p->mutex);1184return err;1185}11861187static int kfd_ioctl_free_memory_of_gpu(struct file *filep,1188struct kfd_process *p, void *data)1189{1190struct kfd_ioctl_free_memory_of_gpu_args *args = data;1191struct kfd_process_device *pdd;1192void *mem;1193int ret;1194uint64_t size = 0;11951196mutex_lock(&p->mutex);1197/*1198* Safeguard to prevent user space from freeing signal BO.1199* It will be freed at process termination.1200*/1201if (p->signal_handle && (p->signal_handle == args->handle)) {1202pr_err("Free signal BO is not allowed\n");1203ret = -EPERM;1204goto err_unlock;1205}12061207pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));1208if (!pdd) {1209pr_err("Process device data doesn't exist\n");1210ret = -EINVAL;1211goto err_pdd;1212}12131214mem = kfd_process_device_translate_handle(1215pdd, GET_IDR_HANDLE(args->handle));1216if (!mem) {1217ret = -EINVAL;1218goto err_unlock;1219}12201221ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,1222(struct kgd_mem *)mem, pdd->drm_priv, &size);12231224/* If freeing the buffer failed, leave the handle in place for1225* clean-up during process tear-down.1226*/1227if (!ret)1228kfd_process_device_remove_obj_handle(1229pdd, GET_IDR_HANDLE(args->handle));12301231atomic64_sub(size, &pdd->vram_usage);12321233err_unlock:1234err_pdd:1235mutex_unlock(&p->mutex);1236return ret;1237}12381239static int kfd_ioctl_map_memory_to_gpu(struct file *filep,1240struct kfd_process *p, void *data)1241{1242struct kfd_ioctl_map_memory_to_gpu_args *args = data;1243struct kfd_process_device *pdd, *peer_pdd;1244void *mem;1245struct kfd_node *dev;1246long err = 0;1247int i;1248uint32_t *devices_arr = NULL;12491250if (!args->n_devices) {1251pr_debug("Device IDs array empty\n");1252return -EINVAL;1253}1254if (args->n_success > args->n_devices) {1255pr_debug("n_success exceeds n_devices\n");1256return -EINVAL;1257}12581259devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),1260GFP_KERNEL);1261if (!devices_arr)1262return -ENOMEM;12631264err = copy_from_user(devices_arr,1265(void __user *)args->device_ids_array_ptr,1266args->n_devices * sizeof(*devices_arr));1267if (err != 0) {1268err = -EFAULT;1269goto copy_from_user_failed;1270}12711272mutex_lock(&p->mutex);1273pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));1274if (!pdd) {1275err = -EINVAL;1276goto get_process_device_data_failed;1277}1278dev = pdd->dev;12791280pdd = kfd_bind_process_to_device(dev, p);1281if (IS_ERR(pdd)) {1282err = PTR_ERR(pdd);1283goto bind_process_to_device_failed;1284}12851286mem = kfd_process_device_translate_handle(pdd,1287GET_IDR_HANDLE(args->handle));1288if (!mem) {1289err = -ENOMEM;1290goto get_mem_obj_from_handle_failed;1291}12921293for (i = args->n_success; i < args->n_devices; i++) {1294peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1295if (!peer_pdd) {1296pr_debug("Getting device by id failed for 0x%x\n",1297devices_arr[i]);1298err = -EINVAL;1299goto get_mem_obj_from_handle_failed;1300}13011302peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);1303if (IS_ERR(peer_pdd)) {1304err = PTR_ERR(peer_pdd);1305goto get_mem_obj_from_handle_failed;1306}13071308err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(1309peer_pdd->dev->adev, (struct kgd_mem *)mem,1310peer_pdd->drm_priv);1311if (err) {1312struct pci_dev *pdev = peer_pdd->dev->adev->pdev;13131314dev_err(dev->adev->dev,1315"Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",1316pci_domain_nr(pdev->bus),1317pdev->bus->number,1318PCI_SLOT(pdev->devfn),1319PCI_FUNC(pdev->devfn),1320((struct kgd_mem *)mem)->domain);1321goto map_memory_to_gpu_failed;1322}1323args->n_success = i+1;1324}13251326err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);1327if (err) {1328pr_debug("Sync memory failed, wait interrupted by user signal\n");1329goto sync_memory_failed;1330}13311332mutex_unlock(&p->mutex);13331334/* Flush TLBs after waiting for the page table updates to complete */1335for (i = 0; i < args->n_devices; i++) {1336peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1337if (WARN_ON_ONCE(!peer_pdd))1338continue;1339kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);1340}1341kfree(devices_arr);13421343return err;13441345get_process_device_data_failed:1346bind_process_to_device_failed:1347get_mem_obj_from_handle_failed:1348map_memory_to_gpu_failed:1349sync_memory_failed:1350mutex_unlock(&p->mutex);1351copy_from_user_failed:1352kfree(devices_arr);13531354return err;1355}13561357static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,1358struct kfd_process *p, void *data)1359{1360struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;1361struct kfd_process_device *pdd, *peer_pdd;1362void *mem;1363long err = 0;1364uint32_t *devices_arr = NULL, i;1365bool flush_tlb;13661367if (!args->n_devices) {1368pr_debug("Device IDs array empty\n");1369return -EINVAL;1370}1371if (args->n_success > args->n_devices) {1372pr_debug("n_success exceeds n_devices\n");1373return -EINVAL;1374}13751376devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),1377GFP_KERNEL);1378if (!devices_arr)1379return -ENOMEM;13801381err = copy_from_user(devices_arr,1382(void __user *)args->device_ids_array_ptr,1383args->n_devices * sizeof(*devices_arr));1384if (err != 0) {1385err = -EFAULT;1386goto copy_from_user_failed;1387}13881389mutex_lock(&p->mutex);1390pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));1391if (!pdd) {1392err = -EINVAL;1393goto bind_process_to_device_failed;1394}13951396mem = kfd_process_device_translate_handle(pdd,1397GET_IDR_HANDLE(args->handle));1398if (!mem) {1399err = -ENOMEM;1400goto get_mem_obj_from_handle_failed;1401}14021403for (i = args->n_success; i < args->n_devices; i++) {1404peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1405if (!peer_pdd) {1406err = -EINVAL;1407goto get_mem_obj_from_handle_failed;1408}1409err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(1410peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);1411if (err) {1412pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices);1413goto unmap_memory_from_gpu_failed;1414}1415args->n_success = i+1;1416}14171418flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);1419if (flush_tlb) {1420err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,1421(struct kgd_mem *) mem, true);1422if (err) {1423pr_debug("Sync memory failed, wait interrupted by user signal\n");1424goto sync_memory_failed;1425}1426}14271428/* Flush TLBs after waiting for the page table updates to complete */1429for (i = 0; i < args->n_devices; i++) {1430peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1431if (WARN_ON_ONCE(!peer_pdd))1432continue;1433if (flush_tlb)1434kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);14351436/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */1437err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);1438if (err)1439goto sync_memory_failed;1440}14411442mutex_unlock(&p->mutex);14431444kfree(devices_arr);14451446return 0;14471448bind_process_to_device_failed:1449get_mem_obj_from_handle_failed:1450unmap_memory_from_gpu_failed:1451sync_memory_failed:1452mutex_unlock(&p->mutex);1453copy_from_user_failed:1454kfree(devices_arr);1455return err;1456}14571458static int kfd_ioctl_alloc_queue_gws(struct file *filep,1459struct kfd_process *p, void *data)1460{1461int retval;1462struct kfd_ioctl_alloc_queue_gws_args *args = data;1463struct queue *q;1464struct kfd_node *dev;14651466mutex_lock(&p->mutex);1467q = pqm_get_user_queue(&p->pqm, args->queue_id);14681469if (q) {1470dev = q->device;1471} else {1472retval = -EINVAL;1473goto out_unlock;1474}14751476if (!dev->gws) {1477retval = -ENODEV;1478goto out_unlock;1479}14801481if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {1482retval = -ENODEV;1483goto out_unlock;1484}14851486if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) ||1487kfd_dbg_has_cwsr_workaround(dev))) {1488retval = -EBUSY;1489goto out_unlock;1490}14911492retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);1493mutex_unlock(&p->mutex);14941495args->first_gws = 0;1496return retval;14971498out_unlock:1499mutex_unlock(&p->mutex);1500return retval;1501}15021503static int kfd_ioctl_get_dmabuf_info(struct file *filep,1504struct kfd_process *p, void *data)1505{1506struct kfd_ioctl_get_dmabuf_info_args *args = data;1507struct kfd_node *dev = NULL;1508struct amdgpu_device *dmabuf_adev;1509void *metadata_buffer = NULL;1510uint32_t flags;1511int8_t xcp_id;1512unsigned int i;1513int r;15141515/* Find a KFD GPU device that supports the get_dmabuf_info query */1516for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)1517if (dev && !kfd_devcgroup_check_permission(dev))1518break;1519if (!dev)1520return -EINVAL;15211522if (args->metadata_ptr) {1523metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);1524if (!metadata_buffer)1525return -ENOMEM;1526}15271528/* Get dmabuf info from KGD */1529r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,1530&dmabuf_adev, &args->size,1531metadata_buffer, args->metadata_size,1532&args->metadata_size, &flags, &xcp_id);1533if (r)1534goto exit;15351536if (xcp_id >= 0)1537args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;1538else1539args->gpu_id = dev->id;1540args->flags = flags;15411542/* Copy metadata buffer to user mode */1543if (metadata_buffer) {1544r = copy_to_user((void __user *)args->metadata_ptr,1545metadata_buffer, args->metadata_size);1546if (r != 0)1547r = -EFAULT;1548}15491550exit:1551kfree(metadata_buffer);15521553return r;1554}15551556static int kfd_ioctl_import_dmabuf(struct file *filep,1557struct kfd_process *p, void *data)1558{1559struct kfd_ioctl_import_dmabuf_args *args = data;1560struct kfd_process_device *pdd;1561int idr_handle;1562uint64_t size;1563void *mem;1564int r;15651566mutex_lock(&p->mutex);1567pdd = kfd_process_device_data_by_id(p, args->gpu_id);1568if (!pdd) {1569r = -EINVAL;1570goto err_unlock;1571}15721573pdd = kfd_bind_process_to_device(pdd->dev, p);1574if (IS_ERR(pdd)) {1575r = PTR_ERR(pdd);1576goto err_unlock;1577}15781579r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd,1580args->va_addr, pdd->drm_priv,1581(struct kgd_mem **)&mem, &size,1582NULL);1583if (r)1584goto err_unlock;15851586idr_handle = kfd_process_device_create_obj_handle(pdd, mem);1587if (idr_handle < 0) {1588r = -EFAULT;1589goto err_free;1590}15911592mutex_unlock(&p->mutex);15931594args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);15951596return 0;15971598err_free:1599amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,1600pdd->drm_priv, NULL);1601err_unlock:1602mutex_unlock(&p->mutex);1603return r;1604}16051606static int kfd_ioctl_export_dmabuf(struct file *filep,1607struct kfd_process *p, void *data)1608{1609struct kfd_ioctl_export_dmabuf_args *args = data;1610struct kfd_process_device *pdd;1611struct dma_buf *dmabuf;1612struct kfd_node *dev;1613void *mem;1614int ret = 0;16151616dev = kfd_device_by_id(GET_GPU_ID(args->handle));1617if (!dev)1618return -EINVAL;16191620mutex_lock(&p->mutex);16211622pdd = kfd_get_process_device_data(dev, p);1623if (!pdd) {1624ret = -EINVAL;1625goto err_unlock;1626}16271628mem = kfd_process_device_translate_handle(pdd,1629GET_IDR_HANDLE(args->handle));1630if (!mem) {1631ret = -EINVAL;1632goto err_unlock;1633}16341635ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);1636mutex_unlock(&p->mutex);1637if (ret)1638goto err_out;16391640ret = dma_buf_fd(dmabuf, args->flags);1641if (ret < 0) {1642dma_buf_put(dmabuf);1643goto err_out;1644}1645/* dma_buf_fd assigns the reference count to the fd, no need to1646* put the reference here.1647*/1648args->dmabuf_fd = ret;16491650return 0;16511652err_unlock:1653mutex_unlock(&p->mutex);1654err_out:1655return ret;1656}16571658/* Handle requests for watching SMI events */1659static int kfd_ioctl_smi_events(struct file *filep,1660struct kfd_process *p, void *data)1661{1662struct kfd_ioctl_smi_events_args *args = data;1663struct kfd_process_device *pdd;16641665mutex_lock(&p->mutex);16661667pdd = kfd_process_device_data_by_id(p, args->gpuid);1668mutex_unlock(&p->mutex);1669if (!pdd)1670return -EINVAL;16711672return kfd_smi_event_open(pdd->dev, &args->anon_fd);1673}16741675#if IS_ENABLED(CONFIG_HSA_AMD_SVM)16761677static int kfd_ioctl_set_xnack_mode(struct file *filep,1678struct kfd_process *p, void *data)1679{1680struct kfd_ioctl_set_xnack_mode_args *args = data;1681int r = 0;16821683mutex_lock(&p->mutex);1684if (args->xnack_enabled >= 0) {1685if (!list_empty(&p->pqm.queues)) {1686pr_debug("Process has user queues running\n");1687r = -EBUSY;1688goto out_unlock;1689}16901691if (p->xnack_enabled == args->xnack_enabled)1692goto out_unlock;16931694if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {1695r = -EPERM;1696goto out_unlock;1697}16981699r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled);1700} else {1701args->xnack_enabled = p->xnack_enabled;1702}17031704out_unlock:1705mutex_unlock(&p->mutex);17061707return r;1708}17091710static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)1711{1712struct kfd_ioctl_svm_args *args = data;1713int r = 0;17141715pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",1716args->start_addr, args->size, args->op, args->nattr);17171718if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))1719return -EINVAL;1720if (!args->start_addr || !args->size)1721return -EINVAL;17221723r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,1724args->attrs);17251726return r;1727}1728#else1729static int kfd_ioctl_set_xnack_mode(struct file *filep,1730struct kfd_process *p, void *data)1731{1732return -EPERM;1733}1734static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)1735{1736return -EPERM;1737}1738#endif17391740static int criu_checkpoint_process(struct kfd_process *p,1741uint8_t __user *user_priv_data,1742uint64_t *priv_offset)1743{1744struct kfd_criu_process_priv_data process_priv;1745int ret;17461747memset(&process_priv, 0, sizeof(process_priv));17481749process_priv.version = KFD_CRIU_PRIV_VERSION;1750/* For CR, we don't consider negative xnack mode which is used for1751* querying without changing it, here 0 simply means disabled and 11752* means enabled so retry for finding a valid PTE.1753*/1754process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;17551756ret = copy_to_user(user_priv_data + *priv_offset,1757&process_priv, sizeof(process_priv));17581759if (ret) {1760pr_err("Failed to copy process information to user\n");1761ret = -EFAULT;1762}17631764*priv_offset += sizeof(process_priv);1765return ret;1766}17671768static int criu_checkpoint_devices(struct kfd_process *p,1769uint32_t num_devices,1770uint8_t __user *user_addr,1771uint8_t __user *user_priv_data,1772uint64_t *priv_offset)1773{1774struct kfd_criu_device_priv_data *device_priv = NULL;1775struct kfd_criu_device_bucket *device_buckets = NULL;1776int ret = 0, i;17771778device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);1779if (!device_buckets) {1780ret = -ENOMEM;1781goto exit;1782}17831784device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);1785if (!device_priv) {1786ret = -ENOMEM;1787goto exit;1788}17891790for (i = 0; i < num_devices; i++) {1791struct kfd_process_device *pdd = p->pdds[i];17921793device_buckets[i].user_gpu_id = pdd->user_gpu_id;1794device_buckets[i].actual_gpu_id = pdd->dev->id;17951796/*1797* priv_data does not contain useful information for now and is reserved for1798* future use, so we do not set its contents.1799*/1800}18011802ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));1803if (ret) {1804pr_err("Failed to copy device information to user\n");1805ret = -EFAULT;1806goto exit;1807}18081809ret = copy_to_user(user_priv_data + *priv_offset,1810device_priv,1811num_devices * sizeof(*device_priv));1812if (ret) {1813pr_err("Failed to copy device information to user\n");1814ret = -EFAULT;1815}1816*priv_offset += num_devices * sizeof(*device_priv);18171818exit:1819kvfree(device_buckets);1820kvfree(device_priv);1821return ret;1822}18231824static uint32_t get_process_num_bos(struct kfd_process *p)1825{1826uint32_t num_of_bos = 0;1827int i;18281829/* Run over all PDDs of the process */1830for (i = 0; i < p->n_pdds; i++) {1831struct kfd_process_device *pdd = p->pdds[i];1832void *mem;1833int id;18341835idr_for_each_entry(&pdd->alloc_idr, mem, id) {1836struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;18371838if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)1839num_of_bos++;1840}1841}1842return num_of_bos;1843}18441845static int criu_get_prime_handle(struct kgd_mem *mem,1846int flags, u32 *shared_fd,1847struct file **file)1848{1849struct dma_buf *dmabuf;1850int ret;18511852ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);1853if (ret) {1854pr_err("dmabuf export failed for the BO\n");1855return ret;1856}18571858ret = get_unused_fd_flags(flags);1859if (ret < 0) {1860pr_err("dmabuf create fd failed, ret:%d\n", ret);1861goto out_free_dmabuf;1862}18631864*shared_fd = ret;1865*file = dmabuf->file;1866return 0;18671868out_free_dmabuf:1869dma_buf_put(dmabuf);1870return ret;1871}18721873static void commit_files(struct file **files,1874struct kfd_criu_bo_bucket *bo_buckets,1875unsigned int count,1876int err)1877{1878while (count--) {1879struct file *file = files[count];18801881if (!file)1882continue;1883if (err) {1884fput(file);1885put_unused_fd(bo_buckets[count].dmabuf_fd);1886} else {1887fd_install(bo_buckets[count].dmabuf_fd, file);1888}1889}1890}18911892static int criu_checkpoint_bos(struct kfd_process *p,1893uint32_t num_bos,1894uint8_t __user *user_bos,1895uint8_t __user *user_priv_data,1896uint64_t *priv_offset)1897{1898struct kfd_criu_bo_bucket *bo_buckets;1899struct kfd_criu_bo_priv_data *bo_privs;1900struct file **files = NULL;1901int ret = 0, pdd_index, bo_index = 0, id;1902void *mem;19031904bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);1905if (!bo_buckets)1906return -ENOMEM;19071908bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);1909if (!bo_privs) {1910ret = -ENOMEM;1911goto exit;1912}19131914files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);1915if (!files) {1916ret = -ENOMEM;1917goto exit;1918}19191920for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {1921struct kfd_process_device *pdd = p->pdds[pdd_index];1922struct amdgpu_bo *dumper_bo;1923struct kgd_mem *kgd_mem;19241925idr_for_each_entry(&pdd->alloc_idr, mem, id) {1926struct kfd_criu_bo_bucket *bo_bucket;1927struct kfd_criu_bo_priv_data *bo_priv;1928int i, dev_idx = 0;19291930kgd_mem = (struct kgd_mem *)mem;1931dumper_bo = kgd_mem->bo;19321933/* Skip checkpointing BOs that are used for Trap handler1934* code and state. Currently, these BOs have a VA that1935* is less GPUVM Base1936*/1937if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base)1938continue;19391940bo_bucket = &bo_buckets[bo_index];1941bo_priv = &bo_privs[bo_index];19421943bo_bucket->gpu_id = pdd->user_gpu_id;1944bo_bucket->addr = (uint64_t)kgd_mem->va;1945bo_bucket->size = amdgpu_bo_size(dumper_bo);1946bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;1947bo_priv->idr_handle = id;19481949if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {1950ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,1951&bo_priv->user_addr);1952if (ret) {1953pr_err("Failed to obtain user address for user-pointer bo\n");1954goto exit;1955}1956}1957if (bo_bucket->alloc_flags1958& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {1959ret = criu_get_prime_handle(kgd_mem,1960bo_bucket->alloc_flags &1961KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,1962&bo_bucket->dmabuf_fd, &files[bo_index]);1963if (ret)1964goto exit;1965} else {1966bo_bucket->dmabuf_fd = KFD_INVALID_FD;1967}19681969if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)1970bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |1971KFD_MMAP_GPU_ID(pdd->dev->id);1972else if (bo_bucket->alloc_flags &1973KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)1974bo_bucket->offset = KFD_MMAP_TYPE_MMIO |1975KFD_MMAP_GPU_ID(pdd->dev->id);1976else1977bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);19781979for (i = 0; i < p->n_pdds; i++) {1980if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem))1981bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;1982}19831984pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"1985"gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",1986bo_bucket->size,1987bo_bucket->addr,1988bo_bucket->offset,1989bo_bucket->gpu_id,1990bo_bucket->alloc_flags,1991bo_priv->idr_handle);1992bo_index++;1993}1994}19951996ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));1997if (ret) {1998pr_err("Failed to copy BO information to user\n");1999ret = -EFAULT;2000goto exit;2001}20022003ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));2004if (ret) {2005pr_err("Failed to copy BO priv information to user\n");2006ret = -EFAULT;2007goto exit;2008}20092010*priv_offset += num_bos * sizeof(*bo_privs);20112012exit:2013commit_files(files, bo_buckets, bo_index, ret);2014kvfree(files);2015kvfree(bo_buckets);2016kvfree(bo_privs);2017return ret;2018}20192020static int criu_get_process_object_info(struct kfd_process *p,2021uint32_t *num_devices,2022uint32_t *num_bos,2023uint32_t *num_objects,2024uint64_t *objs_priv_size)2025{2026uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;2027uint32_t num_queues, num_events, num_svm_ranges;2028int ret;20292030*num_devices = p->n_pdds;2031*num_bos = get_process_num_bos(p);20322033ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);2034if (ret)2035return ret;20362037num_events = kfd_get_num_events(p);20382039svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);20402041*num_objects = num_queues + num_events + num_svm_ranges;20422043if (objs_priv_size) {2044priv_size = sizeof(struct kfd_criu_process_priv_data);2045priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);2046priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);2047priv_size += queues_priv_data_size;2048priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);2049priv_size += svm_priv_data_size;2050*objs_priv_size = priv_size;2051}2052return 0;2053}20542055static int criu_checkpoint(struct file *filep,2056struct kfd_process *p,2057struct kfd_ioctl_criu_args *args)2058{2059int ret;2060uint32_t num_devices, num_bos, num_objects;2061uint64_t priv_size, priv_offset = 0, bo_priv_offset;20622063if (!args->devices || !args->bos || !args->priv_data)2064return -EINVAL;20652066mutex_lock(&p->mutex);20672068if (!p->n_pdds) {2069pr_err("No pdd for given process\n");2070ret = -ENODEV;2071goto exit_unlock;2072}20732074/* Confirm all process queues are evicted */2075if (!p->queues_paused) {2076pr_err("Cannot dump process when queues are not in evicted state\n");2077/* CRIU plugin did not call op PROCESS_INFO before checkpointing */2078ret = -EINVAL;2079goto exit_unlock;2080}20812082ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);2083if (ret)2084goto exit_unlock;20852086if (num_devices != args->num_devices ||2087num_bos != args->num_bos ||2088num_objects != args->num_objects ||2089priv_size != args->priv_data_size) {20902091ret = -EINVAL;2092goto exit_unlock;2093}20942095/* each function will store private data inside priv_data and adjust priv_offset */2096ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);2097if (ret)2098goto exit_unlock;20992100ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,2101(uint8_t __user *)args->priv_data, &priv_offset);2102if (ret)2103goto exit_unlock;21042105/* Leave room for BOs in the private data. They need to be restored2106* before events, but we checkpoint them last to simplify the error2107* handling.2108*/2109bo_priv_offset = priv_offset;2110priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);21112112if (num_objects) {2113ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,2114&priv_offset);2115if (ret)2116goto exit_unlock;21172118ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,2119&priv_offset);2120if (ret)2121goto exit_unlock;21222123ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);2124if (ret)2125goto exit_unlock;2126}21272128/* This must be the last thing in this function that can fail.2129* Otherwise we leak dmabuf file descriptors.2130*/2131ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,2132(uint8_t __user *)args->priv_data, &bo_priv_offset);21332134exit_unlock:2135mutex_unlock(&p->mutex);2136if (ret)2137pr_err("Failed to dump CRIU ret:%d\n", ret);2138else2139pr_debug("CRIU dump ret:%d\n", ret);21402141return ret;2142}21432144static int criu_restore_process(struct kfd_process *p,2145struct kfd_ioctl_criu_args *args,2146uint64_t *priv_offset,2147uint64_t max_priv_data_size)2148{2149int ret = 0;2150struct kfd_criu_process_priv_data process_priv;21512152if (*priv_offset + sizeof(process_priv) > max_priv_data_size)2153return -EINVAL;21542155ret = copy_from_user(&process_priv,2156(void __user *)(args->priv_data + *priv_offset),2157sizeof(process_priv));2158if (ret) {2159pr_err("Failed to copy process private information from user\n");2160ret = -EFAULT;2161goto exit;2162}2163*priv_offset += sizeof(process_priv);21642165if (process_priv.version != KFD_CRIU_PRIV_VERSION) {2166pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",2167process_priv.version, KFD_CRIU_PRIV_VERSION);2168return -EINVAL;2169}21702171pr_debug("Setting XNACK mode\n");2172if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {2173pr_err("xnack mode cannot be set\n");2174ret = -EPERM;2175goto exit;2176} else {2177pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);2178p->xnack_enabled = process_priv.xnack_mode;2179}21802181exit:2182return ret;2183}21842185static int criu_restore_devices(struct kfd_process *p,2186struct kfd_ioctl_criu_args *args,2187uint64_t *priv_offset,2188uint64_t max_priv_data_size)2189{2190struct kfd_criu_device_bucket *device_buckets;2191struct kfd_criu_device_priv_data *device_privs;2192int ret = 0;2193uint32_t i;21942195if (args->num_devices != p->n_pdds)2196return -EINVAL;21972198if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)2199return -EINVAL;22002201device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);2202if (!device_buckets)2203return -ENOMEM;22042205ret = copy_from_user(device_buckets, (void __user *)args->devices,2206args->num_devices * sizeof(*device_buckets));2207if (ret) {2208pr_err("Failed to copy devices buckets from user\n");2209ret = -EFAULT;2210goto exit;2211}22122213for (i = 0; i < args->num_devices; i++) {2214struct kfd_node *dev;2215struct kfd_process_device *pdd;2216struct file *drm_file;22172218/* device private data is not currently used */22192220if (!device_buckets[i].user_gpu_id) {2221pr_err("Invalid user gpu_id\n");2222ret = -EINVAL;2223goto exit;2224}22252226dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);2227if (!dev) {2228pr_err("Failed to find device with gpu_id = %x\n",2229device_buckets[i].actual_gpu_id);2230ret = -EINVAL;2231goto exit;2232}22332234pdd = kfd_get_process_device_data(dev, p);2235if (!pdd) {2236pr_err("Failed to get pdd for gpu_id = %x\n",2237device_buckets[i].actual_gpu_id);2238ret = -EINVAL;2239goto exit;2240}2241pdd->user_gpu_id = device_buckets[i].user_gpu_id;22422243drm_file = fget(device_buckets[i].drm_fd);2244if (!drm_file) {2245pr_err("Invalid render node file descriptor sent from plugin (%d)\n",2246device_buckets[i].drm_fd);2247ret = -EINVAL;2248goto exit;2249}22502251if (pdd->drm_file) {2252ret = -EINVAL;2253goto exit;2254}22552256/* create the vm using render nodes for kfd pdd */2257if (kfd_process_device_init_vm(pdd, drm_file)) {2258pr_err("could not init vm for given pdd\n");2259/* On success, the PDD keeps the drm_file reference */2260fput(drm_file);2261ret = -EINVAL;2262goto exit;2263}2264/*2265* pdd now already has the vm bound to render node so below api won't create a new2266* exclusive kfd mapping but use existing one with renderDXXX but is still needed2267* for iommu v2 binding and runtime pm.2268*/2269pdd = kfd_bind_process_to_device(dev, p);2270if (IS_ERR(pdd)) {2271ret = PTR_ERR(pdd);2272goto exit;2273}22742275if (!pdd->qpd.proc_doorbells) {2276ret = kfd_alloc_process_doorbells(dev->kfd, pdd);2277if (ret)2278goto exit;2279}2280}22812282/*2283* We are not copying device private data from user as we are not using the data for now,2284* but we still adjust for its private data.2285*/2286*priv_offset += args->num_devices * sizeof(*device_privs);22872288exit:2289kfree(device_buckets);2290return ret;2291}22922293static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,2294struct kfd_criu_bo_bucket *bo_bucket,2295struct kfd_criu_bo_priv_data *bo_priv,2296struct kgd_mem **kgd_mem)2297{2298int idr_handle;2299int ret;2300const bool criu_resume = true;2301u64 offset;23022303if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {2304if (bo_bucket->size !=2305kfd_doorbell_process_slice(pdd->dev->kfd))2306return -EINVAL;23072308offset = kfd_get_process_doorbells(pdd);2309if (!offset)2310return -ENOMEM;2311} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {2312/* MMIO BOs need remapped bus address */2313if (bo_bucket->size != PAGE_SIZE) {2314pr_err("Invalid page size\n");2315return -EINVAL;2316}2317offset = pdd->dev->adev->rmmio_remap.bus_addr;2318if (!offset || (PAGE_SIZE > 4096)) {2319pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");2320return -ENOMEM;2321}2322} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {2323offset = bo_priv->user_addr;2324}2325/* Create the BO */2326ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,2327bo_bucket->size, pdd->drm_priv, kgd_mem,2328&offset, bo_bucket->alloc_flags, criu_resume);2329if (ret) {2330pr_err("Could not create the BO\n");2331return ret;2332}2333pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",2334bo_bucket->size, bo_bucket->addr, offset);23352336/* Restore previous IDR handle */2337pr_debug("Restoring old IDR handle for the BO");2338idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,2339bo_priv->idr_handle + 1, GFP_KERNEL);23402341if (idr_handle < 0) {2342pr_err("Could not allocate idr\n");2343amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,2344NULL);2345return -ENOMEM;2346}23472348if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)2349bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);2350if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {2351bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);2352} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {2353bo_bucket->restored_offset = offset;2354} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {2355bo_bucket->restored_offset = offset;2356/* Update the VRAM usage count */2357atomic64_add(bo_bucket->size, &pdd->vram_usage);2358}2359return 0;2360}23612362static int criu_restore_bo(struct kfd_process *p,2363struct kfd_criu_bo_bucket *bo_bucket,2364struct kfd_criu_bo_priv_data *bo_priv,2365struct file **file)2366{2367struct kfd_process_device *pdd;2368struct kgd_mem *kgd_mem;2369int ret;2370int j;23712372pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",2373bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,2374bo_priv->idr_handle);23752376pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);2377if (!pdd) {2378pr_err("Failed to get pdd\n");2379return -ENODEV;2380}23812382ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);2383if (ret)2384return ret;23852386/* now map these BOs to GPU/s */2387for (j = 0; j < p->n_pdds; j++) {2388struct kfd_node *peer;2389struct kfd_process_device *peer_pdd;23902391if (!bo_priv->mapped_gpuids[j])2392break;23932394peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);2395if (!peer_pdd)2396return -EINVAL;23972398peer = peer_pdd->dev;23992400peer_pdd = kfd_bind_process_to_device(peer, p);2401if (IS_ERR(peer_pdd))2402return PTR_ERR(peer_pdd);24032404ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,2405peer_pdd->drm_priv);2406if (ret) {2407pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);2408return ret;2409}2410}24112412pr_debug("map memory was successful for the BO\n");2413/* create the dmabuf object and export the bo */2414if (bo_bucket->alloc_flags2415& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {2416ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,2417&bo_bucket->dmabuf_fd, file);2418if (ret)2419return ret;2420} else {2421bo_bucket->dmabuf_fd = KFD_INVALID_FD;2422}24232424return 0;2425}24262427static int criu_restore_bos(struct kfd_process *p,2428struct kfd_ioctl_criu_args *args,2429uint64_t *priv_offset,2430uint64_t max_priv_data_size)2431{2432struct kfd_criu_bo_bucket *bo_buckets = NULL;2433struct kfd_criu_bo_priv_data *bo_privs = NULL;2434struct file **files = NULL;2435int ret = 0;2436uint32_t i = 0;24372438if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)2439return -EINVAL;24402441/* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */2442amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);24432444bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);2445if (!bo_buckets)2446return -ENOMEM;24472448files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);2449if (!files) {2450ret = -ENOMEM;2451goto exit;2452}24532454ret = copy_from_user(bo_buckets, (void __user *)args->bos,2455args->num_bos * sizeof(*bo_buckets));2456if (ret) {2457pr_err("Failed to copy BOs information from user\n");2458ret = -EFAULT;2459goto exit;2460}24612462bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);2463if (!bo_privs) {2464ret = -ENOMEM;2465goto exit;2466}24672468ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,2469args->num_bos * sizeof(*bo_privs));2470if (ret) {2471pr_err("Failed to copy BOs information from user\n");2472ret = -EFAULT;2473goto exit;2474}2475*priv_offset += args->num_bos * sizeof(*bo_privs);24762477/* Create and map new BOs */2478for (; i < args->num_bos; i++) {2479ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);2480if (ret) {2481pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);2482goto exit;2483}2484} /* done */24852486/* Copy only the buckets back so user can read bo_buckets[N].restored_offset */2487ret = copy_to_user((void __user *)args->bos,2488bo_buckets,2489(args->num_bos * sizeof(*bo_buckets)));2490if (ret)2491ret = -EFAULT;24922493exit:2494commit_files(files, bo_buckets, i, ret);2495kvfree(files);2496kvfree(bo_buckets);2497kvfree(bo_privs);2498return ret;2499}25002501static int criu_restore_objects(struct file *filep,2502struct kfd_process *p,2503struct kfd_ioctl_criu_args *args,2504uint64_t *priv_offset,2505uint64_t max_priv_data_size)2506{2507int ret = 0;2508uint32_t i;25092510BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));2511BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));2512BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));25132514for (i = 0; i < args->num_objects; i++) {2515uint32_t object_type;25162517if (*priv_offset + sizeof(object_type) > max_priv_data_size) {2518pr_err("Invalid private data size\n");2519return -EINVAL;2520}25212522ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));2523if (ret) {2524pr_err("Failed to copy private information from user\n");2525goto exit;2526}25272528switch (object_type) {2529case KFD_CRIU_OBJECT_TYPE_QUEUE:2530ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,2531priv_offset, max_priv_data_size);2532if (ret)2533goto exit;2534break;2535case KFD_CRIU_OBJECT_TYPE_EVENT:2536ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,2537priv_offset, max_priv_data_size);2538if (ret)2539goto exit;2540break;2541case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:2542ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,2543priv_offset, max_priv_data_size);2544if (ret)2545goto exit;2546break;2547default:2548pr_err("Invalid object type:%u at index:%d\n", object_type, i);2549ret = -EINVAL;2550goto exit;2551}2552}2553exit:2554return ret;2555}25562557static int criu_restore(struct file *filep,2558struct kfd_process *p,2559struct kfd_ioctl_criu_args *args)2560{2561uint64_t priv_offset = 0;2562int ret = 0;25632564pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",2565args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);25662567if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data ||2568!args->priv_data_size || !args->num_devices)2569return -EINVAL;25702571mutex_lock(&p->mutex);25722573/*2574* Set the process to evicted state to avoid running any new queues before all the memory2575* mappings are ready.2576*/2577ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);2578if (ret)2579goto exit_unlock;25802581/* Each function will adjust priv_offset based on how many bytes they consumed */2582ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);2583if (ret)2584goto exit_unlock;25852586ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);2587if (ret)2588goto exit_unlock;25892590ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);2591if (ret)2592goto exit_unlock;25932594ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);2595if (ret)2596goto exit_unlock;25972598if (priv_offset != args->priv_data_size) {2599pr_err("Invalid private data size\n");2600ret = -EINVAL;2601}26022603exit_unlock:2604mutex_unlock(&p->mutex);2605if (ret)2606pr_err("Failed to restore CRIU ret:%d\n", ret);2607else2608pr_debug("CRIU restore successful\n");26092610return ret;2611}26122613static int criu_unpause(struct file *filep,2614struct kfd_process *p,2615struct kfd_ioctl_criu_args *args)2616{2617int ret;26182619mutex_lock(&p->mutex);26202621if (!p->queues_paused) {2622mutex_unlock(&p->mutex);2623return -EINVAL;2624}26252626ret = kfd_process_restore_queues(p);2627if (ret)2628pr_err("Failed to unpause queues ret:%d\n", ret);2629else2630p->queues_paused = false;26312632mutex_unlock(&p->mutex);26332634return ret;2635}26362637static int criu_resume(struct file *filep,2638struct kfd_process *p,2639struct kfd_ioctl_criu_args *args)2640{2641struct kfd_process *target = NULL;2642struct pid *pid = NULL;2643int ret = 0;26442645pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,2646args->pid);26472648pid = find_get_pid(args->pid);2649if (!pid) {2650pr_err("Cannot find pid info for %i\n", args->pid);2651return -ESRCH;2652}26532654pr_debug("calling kfd_lookup_process_by_pid\n");2655target = kfd_lookup_process_by_pid(pid);26562657put_pid(pid);26582659if (!target) {2660pr_debug("Cannot find process info for %i\n", args->pid);2661return -ESRCH;2662}26632664mutex_lock(&target->mutex);2665ret = kfd_criu_resume_svm(target);2666if (ret) {2667pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);2668goto exit;2669}26702671ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);2672if (ret)2673pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);26742675exit:2676mutex_unlock(&target->mutex);26772678kfd_unref_process(target);2679return ret;2680}26812682static int criu_process_info(struct file *filep,2683struct kfd_process *p,2684struct kfd_ioctl_criu_args *args)2685{2686int ret = 0;26872688mutex_lock(&p->mutex);26892690if (!p->n_pdds) {2691pr_err("No pdd for given process\n");2692ret = -ENODEV;2693goto err_unlock;2694}26952696ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);2697if (ret)2698goto err_unlock;26992700p->queues_paused = true;27012702args->pid = task_pid_nr_ns(p->lead_thread,2703task_active_pid_ns(p->lead_thread));27042705ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,2706&args->num_objects, &args->priv_data_size);2707if (ret)2708goto err_unlock;27092710dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",2711args->num_devices, args->num_bos, args->num_objects,2712args->priv_data_size);27132714err_unlock:2715if (ret) {2716kfd_process_restore_queues(p);2717p->queues_paused = false;2718}2719mutex_unlock(&p->mutex);2720return ret;2721}27222723static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)2724{2725struct kfd_ioctl_criu_args *args = data;2726int ret;27272728dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);2729switch (args->op) {2730case KFD_CRIU_OP_PROCESS_INFO:2731ret = criu_process_info(filep, p, args);2732break;2733case KFD_CRIU_OP_CHECKPOINT:2734ret = criu_checkpoint(filep, p, args);2735break;2736case KFD_CRIU_OP_UNPAUSE:2737ret = criu_unpause(filep, p, args);2738break;2739case KFD_CRIU_OP_RESTORE:2740ret = criu_restore(filep, p, args);2741break;2742case KFD_CRIU_OP_RESUME:2743ret = criu_resume(filep, p, args);2744break;2745default:2746dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);2747ret = -EINVAL;2748break;2749}27502751if (ret)2752dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);27532754return ret;2755}27562757static int runtime_enable(struct kfd_process *p, uint64_t r_debug,2758bool enable_ttmp_setup)2759{2760int i = 0, ret = 0;27612762if (p->is_runtime_retry)2763goto retry;27642765if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)2766return -EBUSY;27672768for (i = 0; i < p->n_pdds; i++) {2769struct kfd_process_device *pdd = p->pdds[i];27702771if (pdd->qpd.queue_count)2772return -EEXIST;27732774/*2775* Setup TTMPs by default.2776* Note that this call must remain here for MES ADD QUEUE to2777* skip_process_ctx_clear unconditionally as the first call to2778* SET_SHADER_DEBUGGER clears any stale process context data2779* saved in MES.2780*/2781if (pdd->dev->kfd->shared_resources.enable_mes)2782kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));2783}27842785p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;2786p->runtime_info.r_debug = r_debug;2787p->runtime_info.ttmp_setup = enable_ttmp_setup;27882789if (p->runtime_info.ttmp_setup) {2790for (i = 0; i < p->n_pdds; i++) {2791struct kfd_process_device *pdd = p->pdds[i];27922793if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {2794amdgpu_gfx_off_ctrl(pdd->dev->adev, false);2795pdd->dev->kfd2kgd->enable_debug_trap(2796pdd->dev->adev,2797true,2798pdd->dev->vm_info.last_vmid_kfd);2799} else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {2800pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(2801pdd->dev->adev,2802false,28030);2804}2805}2806}28072808retry:2809if (p->debug_trap_enabled) {2810if (!p->is_runtime_retry) {2811kfd_dbg_trap_activate(p);2812kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),2813p, NULL, 0, false, NULL, 0);2814}28152816mutex_unlock(&p->mutex);2817ret = down_interruptible(&p->runtime_enable_sema);2818mutex_lock(&p->mutex);28192820p->is_runtime_retry = !!ret;2821}28222823return ret;2824}28252826static int runtime_disable(struct kfd_process *p)2827{2828int i = 0, ret;2829bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED;28302831p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED;2832p->runtime_info.r_debug = 0;28332834if (p->debug_trap_enabled) {2835if (was_enabled)2836kfd_dbg_trap_deactivate(p, false, 0);28372838if (!p->is_runtime_retry)2839kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),2840p, NULL, 0, false, NULL, 0);28412842mutex_unlock(&p->mutex);2843ret = down_interruptible(&p->runtime_enable_sema);2844mutex_lock(&p->mutex);28452846p->is_runtime_retry = !!ret;2847if (ret)2848return ret;2849}28502851if (was_enabled && p->runtime_info.ttmp_setup) {2852for (i = 0; i < p->n_pdds; i++) {2853struct kfd_process_device *pdd = p->pdds[i];28542855if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))2856amdgpu_gfx_off_ctrl(pdd->dev->adev, true);2857}2858}28592860p->runtime_info.ttmp_setup = false;28612862/* disable ttmp setup */2863for (i = 0; i < p->n_pdds; i++) {2864struct kfd_process_device *pdd = p->pdds[i];28652866if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {2867pdd->spi_dbg_override =2868pdd->dev->kfd2kgd->disable_debug_trap(2869pdd->dev->adev,2870false,2871pdd->dev->vm_info.last_vmid_kfd);28722873if (!pdd->dev->kfd->shared_resources.enable_mes)2874debug_refresh_runlist(pdd->dev->dqm);2875else2876kfd_dbg_set_mes_debug_mode(pdd,2877!kfd_dbg_has_cwsr_workaround(pdd->dev));2878}2879}28802881return 0;2882}28832884static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data)2885{2886struct kfd_ioctl_runtime_enable_args *args = data;2887int r;28882889mutex_lock(&p->mutex);28902891if (args->mode_mask & KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK)2892r = runtime_enable(p, args->r_debug,2893!!(args->mode_mask & KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK));2894else2895r = runtime_disable(p);28962897mutex_unlock(&p->mutex);28982899return r;2900}29012902static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data)2903{2904struct kfd_ioctl_dbg_trap_args *args = data;2905struct task_struct *thread = NULL;2906struct mm_struct *mm = NULL;2907struct pid *pid = NULL;2908struct kfd_process *target = NULL;2909struct kfd_process_device *pdd = NULL;2910int r = 0;29112912if (sched_policy == KFD_SCHED_POLICY_NO_HWS) {2913pr_err("Debugging does not support sched_policy %i", sched_policy);2914return -EINVAL;2915}29162917pid = find_get_pid(args->pid);2918if (!pid) {2919pr_debug("Cannot find pid info for %i\n", args->pid);2920r = -ESRCH;2921goto out;2922}29232924thread = get_pid_task(pid, PIDTYPE_PID);2925if (!thread) {2926r = -ESRCH;2927goto out;2928}29292930mm = get_task_mm(thread);2931if (!mm) {2932r = -ESRCH;2933goto out;2934}29352936if (args->op == KFD_IOC_DBG_TRAP_ENABLE) {2937bool create_process;29382939rcu_read_lock();2940create_process = thread && thread != current && ptrace_parent(thread) == current;2941rcu_read_unlock();29422943target = create_process ? kfd_create_process(thread) :2944kfd_lookup_process_by_pid(pid);2945} else {2946target = kfd_lookup_process_by_pid(pid);2947}29482949if (IS_ERR_OR_NULL(target)) {2950pr_debug("Cannot find process PID %i to debug\n", args->pid);2951r = target ? PTR_ERR(target) : -ESRCH;2952target = NULL;2953goto out;2954}29552956/* Check if target is still PTRACED. */2957rcu_read_lock();2958if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE2959&& ptrace_parent(target->lead_thread) != current) {2960pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid);2961r = -EPERM;2962}2963rcu_read_unlock();29642965if (r)2966goto out;29672968mutex_lock(&target->mutex);29692970if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target->debug_trap_enabled) {2971pr_err("PID %i not debug enabled for op %i\n", args->pid, args->op);2972r = -EINVAL;2973goto unlock_out;2974}29752976if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_ENABLED &&2977(args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE ||2978args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE ||2979args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ||2980args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES ||2981args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||2982args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH ||2983args->op == KFD_IOC_DBG_TRAP_SET_FLAGS)) {2984r = -EPERM;2985goto unlock_out;2986}29872988if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||2989args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) {2990int user_gpu_id = kfd_process_get_user_gpu_id(target,2991args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ?2992args->set_node_address_watch.gpu_id :2993args->clear_node_address_watch.gpu_id);29942995pdd = kfd_process_device_data_by_id(target, user_gpu_id);2996if (user_gpu_id == -EINVAL || !pdd) {2997r = -ENODEV;2998goto unlock_out;2999}3000}30013002switch (args->op) {3003case KFD_IOC_DBG_TRAP_ENABLE:3004if (target != p)3005target->debugger_process = p;30063007r = kfd_dbg_trap_enable(target,3008args->enable.dbg_fd,3009(void __user *)args->enable.rinfo_ptr,3010&args->enable.rinfo_size);3011if (!r)3012target->exception_enable_mask = args->enable.exception_mask;30133014break;3015case KFD_IOC_DBG_TRAP_DISABLE:3016r = kfd_dbg_trap_disable(target);3017break;3018case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT:3019r = kfd_dbg_send_exception_to_runtime(target,3020args->send_runtime_event.gpu_id,3021args->send_runtime_event.queue_id,3022args->send_runtime_event.exception_mask);3023break;3024case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED:3025kfd_dbg_set_enabled_debug_exception_mask(target,3026args->set_exceptions_enabled.exception_mask);3027break;3028case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:3029r = kfd_dbg_trap_set_wave_launch_override(target,3030args->launch_override.override_mode,3031args->launch_override.enable_mask,3032args->launch_override.support_request_mask,3033&args->launch_override.enable_mask,3034&args->launch_override.support_request_mask);3035break;3036case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:3037r = kfd_dbg_trap_set_wave_launch_mode(target,3038args->launch_mode.launch_mode);3039break;3040case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:3041r = suspend_queues(target,3042args->suspend_queues.num_queues,3043args->suspend_queues.grace_period,3044args->suspend_queues.exception_mask,3045(uint32_t *)args->suspend_queues.queue_array_ptr);30463047break;3048case KFD_IOC_DBG_TRAP_RESUME_QUEUES:3049r = resume_queues(target, args->resume_queues.num_queues,3050(uint32_t *)args->resume_queues.queue_array_ptr);3051break;3052case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:3053r = kfd_dbg_trap_set_dev_address_watch(pdd,3054args->set_node_address_watch.address,3055args->set_node_address_watch.mask,3056&args->set_node_address_watch.id,3057args->set_node_address_watch.mode);3058break;3059case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:3060r = kfd_dbg_trap_clear_dev_address_watch(pdd,3061args->clear_node_address_watch.id);3062break;3063case KFD_IOC_DBG_TRAP_SET_FLAGS:3064r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags);3065break;3066case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:3067r = kfd_dbg_ev_query_debug_event(target,3068&args->query_debug_event.queue_id,3069&args->query_debug_event.gpu_id,3070args->query_debug_event.exception_mask,3071&args->query_debug_event.exception_mask);3072break;3073case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:3074r = kfd_dbg_trap_query_exception_info(target,3075args->query_exception_info.source_id,3076args->query_exception_info.exception_code,3077args->query_exception_info.clear_exception,3078(void __user *)args->query_exception_info.info_ptr,3079&args->query_exception_info.info_size);3080break;3081case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:3082r = pqm_get_queue_snapshot(&target->pqm,3083args->queue_snapshot.exception_mask,3084(void __user *)args->queue_snapshot.snapshot_buf_ptr,3085&args->queue_snapshot.num_queues,3086&args->queue_snapshot.entry_size);3087break;3088case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:3089r = kfd_dbg_trap_device_snapshot(target,3090args->device_snapshot.exception_mask,3091(void __user *)args->device_snapshot.snapshot_buf_ptr,3092&args->device_snapshot.num_devices,3093&args->device_snapshot.entry_size);3094break;3095default:3096pr_err("Invalid option: %i\n", args->op);3097r = -EINVAL;3098}30993100unlock_out:3101mutex_unlock(&target->mutex);31023103out:3104if (thread)3105put_task_struct(thread);31063107if (mm)3108mmput(mm);31093110if (pid)3111put_pid(pid);31123113if (target)3114kfd_unref_process(target);31153116return r;3117}31183119#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \3120[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \3121.cmd_drv = 0, .name = #ioctl}31223123/** Ioctl table */3124static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {3125AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,3126kfd_ioctl_get_version, 0),31273128AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,3129kfd_ioctl_create_queue, 0),31303131AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,3132kfd_ioctl_destroy_queue, 0),31333134AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,3135kfd_ioctl_set_memory_policy, 0),31363137AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,3138kfd_ioctl_get_clock_counters, 0),31393140AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,3141kfd_ioctl_get_process_apertures, 0),31423143AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,3144kfd_ioctl_update_queue, 0),31453146AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,3147kfd_ioctl_create_event, 0),31483149AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,3150kfd_ioctl_destroy_event, 0),31513152AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,3153kfd_ioctl_set_event, 0),31543155AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,3156kfd_ioctl_reset_event, 0),31573158AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,3159kfd_ioctl_wait_events, 0),31603161AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,3162kfd_ioctl_dbg_register, 0),31633164AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,3165kfd_ioctl_dbg_unregister, 0),31663167AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,3168kfd_ioctl_dbg_address_watch, 0),31693170AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,3171kfd_ioctl_dbg_wave_control, 0),31723173AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,3174kfd_ioctl_set_scratch_backing_va, 0),31753176AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,3177kfd_ioctl_get_tile_config, 0),31783179AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,3180kfd_ioctl_set_trap_handler, 0),31813182AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,3183kfd_ioctl_get_process_apertures_new, 0),31843185AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,3186kfd_ioctl_acquire_vm, 0),31873188AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,3189kfd_ioctl_alloc_memory_of_gpu, 0),31903191AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,3192kfd_ioctl_free_memory_of_gpu, 0),31933194AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,3195kfd_ioctl_map_memory_to_gpu, 0),31963197AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,3198kfd_ioctl_unmap_memory_from_gpu, 0),31993200AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,3201kfd_ioctl_set_cu_mask, 0),32023203AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,3204kfd_ioctl_get_queue_wave_state, 0),32053206AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,3207kfd_ioctl_get_dmabuf_info, 0),32083209AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,3210kfd_ioctl_import_dmabuf, 0),32113212AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,3213kfd_ioctl_alloc_queue_gws, 0),32143215AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,3216kfd_ioctl_smi_events, 0),32173218AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),32193220AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,3221kfd_ioctl_set_xnack_mode, 0),32223223AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,3224kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),32253226AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,3227kfd_ioctl_get_available_memory, 0),32283229AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF,3230kfd_ioctl_export_dmabuf, 0),32313232AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE,3233kfd_ioctl_runtime_enable, 0),32343235AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP,3236kfd_ioctl_set_debug_trap, 0),3237};32383239#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)32403241static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)3242{3243struct kfd_process *process;3244amdkfd_ioctl_t *func;3245const struct amdkfd_ioctl_desc *ioctl = NULL;3246unsigned int nr = _IOC_NR(cmd);3247char stack_kdata[128];3248char *kdata = NULL;3249unsigned int usize, asize;3250int retcode = -EINVAL;3251bool ptrace_attached = false;32523253if (nr >= AMDKFD_CORE_IOCTL_COUNT) {3254retcode = -ENOTTY;3255goto err_i1;3256}32573258if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {3259u32 amdkfd_size;32603261ioctl = &amdkfd_ioctls[nr];32623263amdkfd_size = _IOC_SIZE(ioctl->cmd);3264usize = asize = _IOC_SIZE(cmd);3265if (amdkfd_size > asize)3266asize = amdkfd_size;32673268cmd = ioctl->cmd;3269} else {3270retcode = -ENOTTY;3271goto err_i1;3272}32733274dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);32753276/* Get the process struct from the filep. Only the process3277* that opened /dev/kfd can use the file descriptor. Child3278* processes need to create their own KFD device context.3279*/3280process = filep->private_data;32813282rcu_read_lock();3283if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&3284ptrace_parent(process->lead_thread) == current)3285ptrace_attached = true;3286rcu_read_unlock();32873288if (process->lead_thread != current->group_leader3289&& !ptrace_attached) {3290dev_dbg(kfd_device, "Using KFD FD in wrong process\n");3291retcode = -EBADF;3292goto err_i1;3293}32943295/* Do not trust userspace, use our own definition */3296func = ioctl->func;32973298if (unlikely(!func)) {3299dev_dbg(kfd_device, "no function\n");3300retcode = -EINVAL;3301goto err_i1;3302}33033304/*3305* Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support3306* CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a3307* more priviledged access.3308*/3309if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {3310if (!capable(CAP_CHECKPOINT_RESTORE) &&3311!capable(CAP_SYS_ADMIN)) {3312retcode = -EACCES;3313goto err_i1;3314}3315}33163317if (cmd & (IOC_IN | IOC_OUT)) {3318if (asize <= sizeof(stack_kdata)) {3319kdata = stack_kdata;3320} else {3321kdata = kmalloc(asize, GFP_KERNEL);3322if (!kdata) {3323retcode = -ENOMEM;3324goto err_i1;3325}3326}3327if (asize > usize)3328memset(kdata + usize, 0, asize - usize);3329}33303331if (cmd & IOC_IN) {3332if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {3333retcode = -EFAULT;3334goto err_i1;3335}3336} else if (cmd & IOC_OUT) {3337memset(kdata, 0, usize);3338}33393340retcode = func(filep, process, kdata);33413342if (cmd & IOC_OUT)3343if (copy_to_user((void __user *)arg, kdata, usize) != 0)3344retcode = -EFAULT;33453346err_i1:3347if (!ioctl)3348dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",3349task_pid_nr(current), cmd, nr);33503351if (kdata != stack_kdata)3352kfree(kdata);33533354if (retcode)3355dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",3356nr, arg, retcode);33573358return retcode;3359}33603361static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,3362struct vm_area_struct *vma)3363{3364phys_addr_t address;33653366if (vma->vm_end - vma->vm_start != PAGE_SIZE)3367return -EINVAL;33683369if (PAGE_SIZE > 4096)3370return -EINVAL;33713372address = dev->adev->rmmio_remap.bus_addr;33733374vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |3375VM_DONTDUMP | VM_PFNMAP);33763377vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);33783379pr_debug("process pid %d mapping mmio page\n"3380" target user address == 0x%08llX\n"3381" physical address == 0x%08llX\n"3382" vm_flags == 0x%04lX\n"3383" size == 0x%04lX\n",3384process->lead_thread->pid, (unsigned long long) vma->vm_start,3385address, vma->vm_flags, PAGE_SIZE);33863387return io_remap_pfn_range(vma,3388vma->vm_start,3389address >> PAGE_SHIFT,3390PAGE_SIZE,3391vma->vm_page_prot);3392}339333943395static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)3396{3397struct kfd_process *process;3398struct kfd_node *dev = NULL;3399unsigned long mmap_offset;3400unsigned int gpu_id;34013402process = kfd_get_process(current);3403if (IS_ERR(process))3404return PTR_ERR(process);34053406mmap_offset = vma->vm_pgoff << PAGE_SHIFT;3407gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);3408if (gpu_id)3409dev = kfd_device_by_id(gpu_id);34103411switch (mmap_offset & KFD_MMAP_TYPE_MASK) {3412case KFD_MMAP_TYPE_DOORBELL:3413if (!dev)3414return -ENODEV;3415return kfd_doorbell_mmap(dev, process, vma);34163417case KFD_MMAP_TYPE_EVENTS:3418return kfd_event_mmap(process, vma);34193420case KFD_MMAP_TYPE_RESERVED_MEM:3421if (!dev)3422return -ENODEV;3423return kfd_reserved_mem_mmap(dev, process, vma);3424case KFD_MMAP_TYPE_MMIO:3425if (!dev)3426return -ENODEV;3427return kfd_mmio_mmap(dev, process, vma);3428}34293430return -EFAULT;3431}343234333434