Path: blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
29285 views
/*1* Copyright 2008 Jerome Glisse.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR19* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,20* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER21* DEALINGS IN THE SOFTWARE.22*23* Authors:24* Jerome Glisse <[email protected]>25*/2627#include <linux/file.h>28#include <linux/pagemap.h>29#include <linux/sync_file.h>30#include <linux/dma-buf.h>31#include <linux/hmm.h>3233#include <drm/amdgpu_drm.h>34#include <drm/drm_syncobj.h>35#include <drm/ttm/ttm_tt.h>3637#include "amdgpu_cs.h"38#include "amdgpu.h"39#include "amdgpu_trace.h"40#include "amdgpu_gmc.h"41#include "amdgpu_gem.h"42#include "amdgpu_ras.h"4344static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,45struct amdgpu_device *adev,46struct drm_file *filp,47union drm_amdgpu_cs *cs)48{49struct amdgpu_fpriv *fpriv = filp->driver_priv;5051if (cs->in.num_chunks == 0)52return -EINVAL;5354memset(p, 0, sizeof(*p));55p->adev = adev;56p->filp = filp;5758p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);59if (!p->ctx)60return -EINVAL;6162if (atomic_read(&p->ctx->guilty)) {63amdgpu_ctx_put(p->ctx);64return -ECANCELED;65}6667amdgpu_sync_create(&p->sync);68drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |69DRM_EXEC_IGNORE_DUPLICATES, 0);70return 0;71}7273static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,74struct drm_amdgpu_cs_chunk_ib *chunk_ib)75{76struct drm_sched_entity *entity;77unsigned int i;78int r;7980r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,81chunk_ib->ip_instance,82chunk_ib->ring, &entity);83if (r)84return r;8586/*87* Abort if there is no run queue associated with this entity.88* Possibly because of disabled HW IP.89*/90if (entity->rq == NULL)91return -EINVAL;9293/* Check if we can add this IB to some existing job */94for (i = 0; i < p->gang_size; ++i)95if (p->entities[i] == entity)96return i;9798/* If not increase the gang size if possible */99if (i == AMDGPU_CS_GANG_SIZE)100return -EINVAL;101102p->entities[i] = entity;103p->gang_size = i + 1;104return i;105}106107static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,108struct drm_amdgpu_cs_chunk_ib *chunk_ib,109unsigned int *num_ibs)110{111int r;112113r = amdgpu_cs_job_idx(p, chunk_ib);114if (r < 0)115return r;116117if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))118return -EINVAL;119120++(num_ibs[r]);121p->gang_leader_idx = r;122return 0;123}124125static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,126struct drm_amdgpu_cs_chunk_fence *data,127uint32_t *offset)128{129struct drm_gem_object *gobj;130unsigned long size;131132gobj = drm_gem_object_lookup(p->filp, data->handle);133if (gobj == NULL)134return -EINVAL;135136p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));137drm_gem_object_put(gobj);138139size = amdgpu_bo_size(p->uf_bo);140if (size != PAGE_SIZE || data->offset > (size - 8))141return -EINVAL;142143if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))144return -EINVAL;145146*offset = data->offset;147return 0;148}149150static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,151struct drm_amdgpu_bo_list_in *data)152{153struct drm_amdgpu_bo_list_entry *info;154int r;155156r = amdgpu_bo_create_list_entry_array(data, &info);157if (r)158return r;159160r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,161&p->bo_list);162if (r)163goto error_free;164165kvfree(info);166return 0;167168error_free:169kvfree(info);170171return r;172}173174/* Copy the data from userspace and go over it the first time */175static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,176union drm_amdgpu_cs *cs)177{178struct amdgpu_fpriv *fpriv = p->filp->driver_priv;179unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };180struct amdgpu_vm *vm = &fpriv->vm;181uint64_t *chunk_array;182uint32_t uf_offset = 0;183size_t size;184int ret;185int i;186187chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks),188cs->in.num_chunks,189sizeof(uint64_t));190if (IS_ERR(chunk_array))191return PTR_ERR(chunk_array);192193p->nchunks = cs->in.num_chunks;194p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),195GFP_KERNEL);196if (!p->chunks) {197ret = -ENOMEM;198goto free_chunk;199}200201for (i = 0; i < p->nchunks; i++) {202struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;203struct drm_amdgpu_cs_chunk user_chunk;204205chunk_ptr = u64_to_user_ptr(chunk_array[i]);206if (copy_from_user(&user_chunk, chunk_ptr,207sizeof(struct drm_amdgpu_cs_chunk))) {208ret = -EFAULT;209i--;210goto free_partial_kdata;211}212p->chunks[i].chunk_id = user_chunk.chunk_id;213p->chunks[i].length_dw = user_chunk.length_dw;214215size = p->chunks[i].length_dw;216217p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data),218size,219sizeof(uint32_t));220if (IS_ERR(p->chunks[i].kdata)) {221ret = PTR_ERR(p->chunks[i].kdata);222i--;223goto free_partial_kdata;224}225size *= sizeof(uint32_t);226227/* Assume the worst on the following checks */228ret = -EINVAL;229switch (p->chunks[i].chunk_id) {230case AMDGPU_CHUNK_ID_IB:231if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))232goto free_partial_kdata;233234ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);235if (ret)236goto free_partial_kdata;237break;238239case AMDGPU_CHUNK_ID_FENCE:240if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))241goto free_partial_kdata;242243ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,244&uf_offset);245if (ret)246goto free_partial_kdata;247break;248249case AMDGPU_CHUNK_ID_BO_HANDLES:250if (size < sizeof(struct drm_amdgpu_bo_list_in))251goto free_partial_kdata;252253/* Only a single BO list is allowed to simplify handling. */254if (p->bo_list)255goto free_partial_kdata;256257ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);258if (ret)259goto free_partial_kdata;260break;261262case AMDGPU_CHUNK_ID_DEPENDENCIES:263case AMDGPU_CHUNK_ID_SYNCOBJ_IN:264case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:265case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:266case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:267case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:268case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:269break;270271default:272goto free_partial_kdata;273}274}275276if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) {277ret = -EINVAL;278goto free_all_kdata;279}280281for (i = 0; i < p->gang_size; ++i) {282ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,283num_ibs[i], &p->jobs[i],284p->filp->client_id);285if (ret)286goto free_all_kdata;287switch (p->adev->enforce_isolation[fpriv->xcp_id]) {288case AMDGPU_ENFORCE_ISOLATION_DISABLE:289default:290p->jobs[i]->enforce_isolation = false;291p->jobs[i]->run_cleaner_shader = false;292break;293case AMDGPU_ENFORCE_ISOLATION_ENABLE:294p->jobs[i]->enforce_isolation = true;295p->jobs[i]->run_cleaner_shader = true;296break;297case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:298p->jobs[i]->enforce_isolation = true;299p->jobs[i]->run_cleaner_shader = false;300break;301case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:302p->jobs[i]->enforce_isolation = true;303p->jobs[i]->run_cleaner_shader = false;304break;305}306}307p->gang_leader = p->jobs[p->gang_leader_idx];308309if (p->ctx->generation != p->gang_leader->generation) {310ret = -ECANCELED;311goto free_all_kdata;312}313314if (p->uf_bo)315p->gang_leader->uf_addr = uf_offset;316kvfree(chunk_array);317318/* Use this opportunity to fill in task info for the vm */319amdgpu_vm_set_task_info(vm);320321return 0;322323free_all_kdata:324i = p->nchunks - 1;325free_partial_kdata:326for (; i >= 0; i--)327kvfree(p->chunks[i].kdata);328kvfree(p->chunks);329p->chunks = NULL;330p->nchunks = 0;331free_chunk:332kvfree(chunk_array);333334return ret;335}336337static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,338struct amdgpu_cs_chunk *chunk,339unsigned int *ce_preempt,340unsigned int *de_preempt)341{342struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;343struct amdgpu_fpriv *fpriv = p->filp->driver_priv;344struct amdgpu_vm *vm = &fpriv->vm;345struct amdgpu_ring *ring;346struct amdgpu_job *job;347struct amdgpu_ib *ib;348int r;349350r = amdgpu_cs_job_idx(p, chunk_ib);351if (r < 0)352return r;353354job = p->jobs[r];355ring = amdgpu_job_ring(job);356ib = &job->ibs[job->num_ibs++];357358/* submissions to kernel queues are disabled */359if (ring->no_user_submission)360return -EINVAL;361362/* MM engine doesn't support user fences */363if (p->uf_bo && ring->funcs->no_user_fence)364return -EINVAL;365366if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&367chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {368if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)369(*ce_preempt)++;370else371(*de_preempt)++;372373/* Each GFX command submit allows only 1 IB max374* preemptible for CE & DE */375if (*ce_preempt > 1 || *de_preempt > 1)376return -EINVAL;377}378379if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)380job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;381382r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?383chunk_ib->ib_bytes : 0,384AMDGPU_IB_POOL_DELAYED, ib);385if (r) {386drm_err(adev_to_drm(p->adev), "Failed to get ib !\n");387return r;388}389390ib->gpu_addr = chunk_ib->va_start;391ib->length_dw = chunk_ib->ib_bytes / 4;392ib->flags = chunk_ib->flags;393return 0;394}395396static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,397struct amdgpu_cs_chunk *chunk)398{399struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;400struct amdgpu_fpriv *fpriv = p->filp->driver_priv;401unsigned int num_deps;402int i, r;403404num_deps = chunk->length_dw * 4 /405sizeof(struct drm_amdgpu_cs_chunk_dep);406407for (i = 0; i < num_deps; ++i) {408struct amdgpu_ctx *ctx;409struct drm_sched_entity *entity;410struct dma_fence *fence;411412ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);413if (ctx == NULL)414return -EINVAL;415416r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,417deps[i].ip_instance,418deps[i].ring, &entity);419if (r) {420amdgpu_ctx_put(ctx);421return r;422}423424fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);425amdgpu_ctx_put(ctx);426427if (IS_ERR(fence))428return PTR_ERR(fence);429else if (!fence)430continue;431432if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {433struct drm_sched_fence *s_fence;434struct dma_fence *old = fence;435436s_fence = to_drm_sched_fence(fence);437fence = dma_fence_get(&s_fence->scheduled);438dma_fence_put(old);439}440441r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);442dma_fence_put(fence);443if (r)444return r;445}446return 0;447}448449static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,450uint32_t handle, u64 point,451u64 flags)452{453struct dma_fence *fence;454int r;455456r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);457if (r) {458drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n",459handle, point, r);460return r;461}462463r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);464dma_fence_put(fence);465return r;466}467468static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,469struct amdgpu_cs_chunk *chunk)470{471struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;472unsigned int num_deps;473int i, r;474475num_deps = chunk->length_dw * 4 /476sizeof(struct drm_amdgpu_cs_chunk_sem);477for (i = 0; i < num_deps; ++i) {478r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);479if (r)480return r;481}482483return 0;484}485486static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,487struct amdgpu_cs_chunk *chunk)488{489struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;490unsigned int num_deps;491int i, r;492493num_deps = chunk->length_dw * 4 /494sizeof(struct drm_amdgpu_cs_chunk_syncobj);495for (i = 0; i < num_deps; ++i) {496r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,497syncobj_deps[i].point,498syncobj_deps[i].flags);499if (r)500return r;501}502503return 0;504}505506static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,507struct amdgpu_cs_chunk *chunk)508{509struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;510unsigned int num_deps;511int i;512513num_deps = chunk->length_dw * 4 /514sizeof(struct drm_amdgpu_cs_chunk_sem);515516if (p->post_deps)517return -EINVAL;518519p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),520GFP_KERNEL);521p->num_post_deps = 0;522523if (!p->post_deps)524return -ENOMEM;525526527for (i = 0; i < num_deps; ++i) {528p->post_deps[i].syncobj =529drm_syncobj_find(p->filp, deps[i].handle);530if (!p->post_deps[i].syncobj)531return -EINVAL;532p->post_deps[i].chain = NULL;533p->post_deps[i].point = 0;534p->num_post_deps++;535}536537return 0;538}539540static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,541struct amdgpu_cs_chunk *chunk)542{543struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;544unsigned int num_deps;545int i;546547num_deps = chunk->length_dw * 4 /548sizeof(struct drm_amdgpu_cs_chunk_syncobj);549550if (p->post_deps)551return -EINVAL;552553p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),554GFP_KERNEL);555p->num_post_deps = 0;556557if (!p->post_deps)558return -ENOMEM;559560for (i = 0; i < num_deps; ++i) {561struct amdgpu_cs_post_dep *dep = &p->post_deps[i];562563dep->chain = NULL;564if (syncobj_deps[i].point) {565dep->chain = dma_fence_chain_alloc();566if (!dep->chain)567return -ENOMEM;568}569570dep->syncobj = drm_syncobj_find(p->filp,571syncobj_deps[i].handle);572if (!dep->syncobj) {573dma_fence_chain_free(dep->chain);574return -EINVAL;575}576dep->point = syncobj_deps[i].point;577p->num_post_deps++;578}579580return 0;581}582583static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,584struct amdgpu_cs_chunk *chunk)585{586struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;587int i;588589if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)590return -EINVAL;591592for (i = 0; i < p->gang_size; ++i) {593p->jobs[i]->shadow_va = shadow->shadow_va;594p->jobs[i]->csa_va = shadow->csa_va;595p->jobs[i]->gds_va = shadow->gds_va;596p->jobs[i]->init_shadow =597shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;598}599600return 0;601}602603static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)604{605unsigned int ce_preempt = 0, de_preempt = 0;606int i, r;607608for (i = 0; i < p->nchunks; ++i) {609struct amdgpu_cs_chunk *chunk;610611chunk = &p->chunks[i];612613switch (chunk->chunk_id) {614case AMDGPU_CHUNK_ID_IB:615r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);616if (r)617return r;618break;619case AMDGPU_CHUNK_ID_DEPENDENCIES:620case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:621r = amdgpu_cs_p2_dependencies(p, chunk);622if (r)623return r;624break;625case AMDGPU_CHUNK_ID_SYNCOBJ_IN:626r = amdgpu_cs_p2_syncobj_in(p, chunk);627if (r)628return r;629break;630case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:631r = amdgpu_cs_p2_syncobj_out(p, chunk);632if (r)633return r;634break;635case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:636r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);637if (r)638return r;639break;640case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:641r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);642if (r)643return r;644break;645case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:646r = amdgpu_cs_p2_shadow(p, chunk);647if (r)648return r;649break;650}651}652653return 0;654}655656/* Convert microseconds to bytes. */657static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)658{659if (us <= 0 || !adev->mm_stats.log2_max_MBps)660return 0;661662/* Since accum_us is incremented by a million per second, just663* multiply it by the number of MB/s to get the number of bytes.664*/665return us << adev->mm_stats.log2_max_MBps;666}667668static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)669{670if (!adev->mm_stats.log2_max_MBps)671return 0;672673return bytes >> adev->mm_stats.log2_max_MBps;674}675676/* Returns how many bytes TTM can move right now. If no bytes can be moved,677* it returns 0. If it returns non-zero, it's OK to move at least one buffer,678* which means it can go over the threshold once. If that happens, the driver679* will be in debt and no other buffer migrations can be done until that debt680* is repaid.681*682* This approach allows moving a buffer of any size (it's important to allow683* that).684*685* The currency is simply time in microseconds and it increases as the clock686* ticks. The accumulated microseconds (us) are converted to bytes and687* returned.688*/689static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,690u64 *max_bytes,691u64 *max_vis_bytes)692{693s64 time_us, increment_us;694u64 free_vram, total_vram, used_vram;695/* Allow a maximum of 200 accumulated ms. This is basically per-IB696* throttling.697*698* It means that in order to get full max MBps, at least 5 IBs per699* second must be submitted and not more than 200ms apart from each700* other.701*/702const s64 us_upper_bound = 200000;703704if (!adev->mm_stats.log2_max_MBps) {705*max_bytes = 0;706*max_vis_bytes = 0;707return;708}709710total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);711used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);712free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;713714spin_lock(&adev->mm_stats.lock);715716/* Increase the amount of accumulated us. */717time_us = ktime_to_us(ktime_get());718increment_us = time_us - adev->mm_stats.last_update_us;719adev->mm_stats.last_update_us = time_us;720adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,721us_upper_bound);722723/* This prevents the short period of low performance when the VRAM724* usage is low and the driver is in debt or doesn't have enough725* accumulated us to fill VRAM quickly.726*727* The situation can occur in these cases:728* - a lot of VRAM is freed by userspace729* - the presence of a big buffer causes a lot of evictions730* (solution: split buffers into smaller ones)731*732* If 128 MB or 1/8th of VRAM is free, start filling it now by setting733* accum_us to a positive number.734*/735if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {736s64 min_us;737738/* Be more aggressive on dGPUs. Try to fill a portion of free739* VRAM now.740*/741if (!(adev->flags & AMD_IS_APU))742min_us = bytes_to_us(adev, free_vram / 4);743else744min_us = 0; /* Reset accum_us on APUs. */745746adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);747}748749/* This is set to 0 if the driver is in debt to disallow (optional)750* buffer moves.751*/752*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);753754/* Do the same for visible VRAM if half of it is free */755if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {756u64 total_vis_vram = adev->gmc.visible_vram_size;757u64 used_vis_vram =758amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);759760if (used_vis_vram < total_vis_vram) {761u64 free_vis_vram = total_vis_vram - used_vis_vram;762763adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +764increment_us, us_upper_bound);765766if (free_vis_vram >= total_vis_vram / 2)767adev->mm_stats.accum_us_vis =768max(bytes_to_us(adev, free_vis_vram / 2),769adev->mm_stats.accum_us_vis);770}771772*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);773} else {774*max_vis_bytes = 0;775}776777spin_unlock(&adev->mm_stats.lock);778}779780/* Report how many bytes have really been moved for the last command781* submission. This can result in a debt that can stop buffer migrations782* temporarily.783*/784void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,785u64 num_vis_bytes)786{787spin_lock(&adev->mm_stats.lock);788adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);789adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);790spin_unlock(&adev->mm_stats.lock);791}792793static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)794{795struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);796struct amdgpu_cs_parser *p = param;797struct ttm_operation_ctx ctx = {798.interruptible = true,799.no_wait_gpu = false,800.resv = bo->tbo.base.resv801};802uint32_t domain;803int r;804805if (bo->tbo.pin_count)806return 0;807808/* Don't move this buffer if we have depleted our allowance809* to move it. Don't move anything if the threshold is zero.810*/811if (p->bytes_moved < p->bytes_moved_threshold &&812(!bo->tbo.base.dma_buf ||813list_empty(&bo->tbo.base.dma_buf->attachments))) {814if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&815(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {816/* And don't move a CPU_ACCESS_REQUIRED BO to limited817* visible VRAM if we've depleted our allowance to do818* that.819*/820if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)821domain = bo->preferred_domains;822else823domain = bo->allowed_domains;824} else {825domain = bo->preferred_domains;826}827} else {828domain = bo->allowed_domains;829}830831retry:832amdgpu_bo_placement_from_domain(bo, domain);833r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);834835p->bytes_moved += ctx.bytes_moved;836if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&837amdgpu_res_cpu_visible(adev, bo->tbo.resource))838p->bytes_moved_vis += ctx.bytes_moved;839840if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {841domain = bo->allowed_domains;842goto retry;843}844845return r;846}847848static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,849union drm_amdgpu_cs *cs)850{851struct amdgpu_fpriv *fpriv = p->filp->driver_priv;852struct ttm_operation_ctx ctx = { true, false };853struct amdgpu_vm *vm = &fpriv->vm;854struct amdgpu_bo_list_entry *e;855struct drm_gem_object *obj;856unsigned long index;857unsigned int i;858int r;859860/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */861if (cs->in.bo_list_handle) {862if (p->bo_list)863return -EINVAL;864865r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,866&p->bo_list);867if (r)868return r;869} else if (!p->bo_list) {870/* Create a empty bo_list when no handle is provided */871r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,872&p->bo_list);873if (r)874return r;875}876877mutex_lock(&p->bo_list->bo_list_mutex);878879/* Get userptr backing pages. If pages are updated after registered880* in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do881* amdgpu_ttm_backend_bind() to flush and invalidate new pages882*/883amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {884bool userpage_invalidated = false;885struct amdgpu_bo *bo = e->bo;886887r = amdgpu_ttm_tt_get_user_pages(bo, &e->range);888if (r)889goto out_free_user_pages;890891for (i = 0; i < bo->tbo.ttm->num_pages; i++) {892if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) {893userpage_invalidated = true;894break;895}896}897e->user_invalidated = userpage_invalidated;898}899900drm_exec_until_all_locked(&p->exec) {901r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size);902drm_exec_retry_on_contention(&p->exec);903if (unlikely(r))904goto out_free_user_pages;905906amdgpu_bo_list_for_each_entry(e, p->bo_list) {907/* One fence for TTM and one for each CS job */908r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,9091 + p->gang_size);910drm_exec_retry_on_contention(&p->exec);911if (unlikely(r))912goto out_free_user_pages;913914e->bo_va = amdgpu_vm_bo_find(vm, e->bo);915}916917if (p->uf_bo) {918r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,9191 + p->gang_size);920drm_exec_retry_on_contention(&p->exec);921if (unlikely(r))922goto out_free_user_pages;923}924}925926amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {927struct mm_struct *usermm;928929usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);930if (usermm && usermm != current->mm) {931r = -EPERM;932goto out_free_user_pages;933}934935if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&936e->user_invalidated) {937amdgpu_bo_placement_from_domain(e->bo,938AMDGPU_GEM_DOMAIN_CPU);939r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,940&ctx);941if (r)942goto out_free_user_pages;943944amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,945e->range);946}947}948949amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,950&p->bytes_moved_vis_threshold);951p->bytes_moved = 0;952p->bytes_moved_vis = 0;953954r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,955amdgpu_cs_bo_validate, p);956if (r) {957drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n");958goto out_free_user_pages;959}960961drm_exec_for_each_locked_object(&p->exec, index, obj) {962r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));963if (unlikely(r))964goto out_free_user_pages;965}966967if (p->uf_bo) {968r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo);969if (unlikely(r))970goto out_free_user_pages;971972p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);973}974975amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,976p->bytes_moved_vis);977978for (i = 0; i < p->gang_size; ++i)979amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,980p->bo_list->gws_obj,981p->bo_list->oa_obj);982return 0;983984out_free_user_pages:985amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {986struct amdgpu_bo *bo = e->bo;987988amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);989e->range = NULL;990}991mutex_unlock(&p->bo_list->bo_list_mutex);992return r;993}994995static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)996{997int i, j;998999if (!trace_amdgpu_cs_enabled())1000return;10011002for (i = 0; i < p->gang_size; ++i) {1003struct amdgpu_job *job = p->jobs[i];10041005for (j = 0; j < job->num_ibs; ++j)1006trace_amdgpu_cs(p, job, &job->ibs[j]);1007}1008}10091010static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,1011struct amdgpu_job *job)1012{1013struct amdgpu_ring *ring = amdgpu_job_ring(job);1014unsigned int i;1015int r;10161017/* Only for UVD/VCE VM emulation */1018if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)1019return 0;10201021for (i = 0; i < job->num_ibs; ++i) {1022struct amdgpu_ib *ib = &job->ibs[i];1023struct amdgpu_bo_va_mapping *m;1024struct amdgpu_bo *aobj;1025uint64_t va_start;1026uint8_t *kptr;10271028va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;1029r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);1030if (r) {1031drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n");1032return r;1033}10341035if ((va_start + ib->length_dw * 4) >1036(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {1037drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n");1038return -EINVAL;1039}10401041/* the IB should be reserved at this point */1042r = amdgpu_bo_kmap(aobj, (void **)&kptr);1043if (r)1044return r;10451046kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);10471048if (ring->funcs->parse_cs) {1049memcpy(ib->ptr, kptr, ib->length_dw * 4);1050amdgpu_bo_kunmap(aobj);10511052r = amdgpu_ring_parse_cs(ring, p, job, ib);1053if (r)1054return r;10551056if (ib->sa_bo)1057ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);1058} else {1059ib->ptr = (uint32_t *)kptr;1060r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);1061amdgpu_bo_kunmap(aobj);1062if (r)1063return r;1064}1065}10661067return 0;1068}10691070static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)1071{1072unsigned int i;1073int r;10741075for (i = 0; i < p->gang_size; ++i) {1076r = amdgpu_cs_patch_ibs(p, p->jobs[i]);1077if (r)1078return r;1079}1080return 0;1081}10821083static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)1084{1085struct amdgpu_fpriv *fpriv = p->filp->driver_priv;1086struct amdgpu_job *job = p->gang_leader;1087struct amdgpu_device *adev = p->adev;1088struct amdgpu_vm *vm = &fpriv->vm;1089struct amdgpu_bo_list_entry *e;1090struct amdgpu_bo_va *bo_va;1091unsigned int i;1092int r;10931094/*1095* We can't use gang submit on with reserved VMIDs when the VM changes1096* can't be invalidated by more than one engine at the same time.1097*/1098if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {1099for (i = 0; i < p->gang_size; ++i) {1100struct drm_sched_entity *entity = p->entities[i];1101struct drm_gpu_scheduler *sched = entity->rq->sched;1102struct amdgpu_ring *ring = to_amdgpu_ring(sched);11031104if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))1105return -EINVAL;1106}1107}11081109if (!amdgpu_vm_ready(vm))1110return -EINVAL;11111112r = amdgpu_vm_clear_freed(adev, vm, NULL);1113if (r)1114return r;11151116r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);1117if (r)1118return r;11191120r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,1121GFP_KERNEL);1122if (r)1123return r;11241125if (fpriv->csa_va) {1126bo_va = fpriv->csa_va;1127BUG_ON(!bo_va);1128r = amdgpu_vm_bo_update(adev, bo_va, false);1129if (r)1130return r;11311132r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,1133GFP_KERNEL);1134if (r)1135return r;1136}11371138/* FIXME: In theory this loop shouldn't be needed any more when1139* amdgpu_vm_handle_moved handles all moved BOs that are reserved1140* with p->ticket. But removing it caused test regressions, so I'm1141* leaving it here for now.1142*/1143amdgpu_bo_list_for_each_entry(e, p->bo_list) {1144bo_va = e->bo_va;1145if (bo_va == NULL)1146continue;11471148r = amdgpu_vm_bo_update(adev, bo_va, false);1149if (r)1150return r;11511152r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,1153GFP_KERNEL);1154if (r)1155return r;1156}11571158r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);1159if (r)1160return r;11611162r = amdgpu_vm_update_pdes(adev, vm, false);1163if (r)1164return r;11651166r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);1167if (r)1168return r;11691170for (i = 0; i < p->gang_size; ++i) {1171job = p->jobs[i];11721173if (!job->vm)1174continue;11751176job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);1177}11781179if (adev->debug_vm) {1180/* Invalidate all BOs to test for userspace bugs */1181amdgpu_bo_list_for_each_entry(e, p->bo_list) {1182struct amdgpu_bo *bo = e->bo;11831184/* ignore duplicates */1185if (!bo)1186continue;11871188amdgpu_vm_bo_invalidate(bo, false);1189}1190}11911192return 0;1193}11941195static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)1196{1197struct amdgpu_fpriv *fpriv = p->filp->driver_priv;1198struct drm_gpu_scheduler *sched;1199struct drm_gem_object *obj;1200struct dma_fence *fence;1201unsigned long index;1202unsigned int i;1203int r;12041205r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);1206if (r) {1207if (r != -ERESTARTSYS)1208drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n");1209return r;1210}12111212drm_exec_for_each_locked_object(&p->exec, index, obj) {1213struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);12141215struct dma_resv *resv = bo->tbo.base.resv;1216enum amdgpu_sync_mode sync_mode;12171218sync_mode = amdgpu_bo_explicit_sync(bo) ?1219AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;1220r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,1221&fpriv->vm);1222if (r)1223return r;1224}12251226for (i = 0; i < p->gang_size; ++i) {1227r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);1228if (r)1229return r;1230}12311232sched = p->gang_leader->base.entity->rq->sched;1233while ((fence = amdgpu_sync_get_fence(&p->sync))) {1234struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);12351236/*1237* When we have an dependency it might be necessary to insert a1238* pipeline sync to make sure that all caches etc are flushed and the1239* next job actually sees the results from the previous one1240* before we start executing on the same scheduler ring.1241*/1242if (!s_fence || s_fence->sched != sched) {1243dma_fence_put(fence);1244continue;1245}12461247r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,1248GFP_KERNEL);1249dma_fence_put(fence);1250if (r)1251return r;1252}1253return 0;1254}12551256static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)1257{1258int i;12591260for (i = 0; i < p->num_post_deps; ++i) {1261if (p->post_deps[i].chain && p->post_deps[i].point) {1262drm_syncobj_add_point(p->post_deps[i].syncobj,1263p->post_deps[i].chain,1264p->fence, p->post_deps[i].point);1265p->post_deps[i].chain = NULL;1266} else {1267drm_syncobj_replace_fence(p->post_deps[i].syncobj,1268p->fence);1269}1270}1271}12721273static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,1274union drm_amdgpu_cs *cs)1275{1276struct amdgpu_fpriv *fpriv = p->filp->driver_priv;1277struct amdgpu_job *leader = p->gang_leader;1278struct amdgpu_bo_list_entry *e;1279struct drm_gem_object *gobj;1280unsigned long index;1281unsigned int i;1282uint64_t seq;1283int r;12841285for (i = 0; i < p->gang_size; ++i)1286drm_sched_job_arm(&p->jobs[i]->base);12871288for (i = 0; i < p->gang_size; ++i) {1289struct dma_fence *fence;12901291if (p->jobs[i] == leader)1292continue;12931294fence = &p->jobs[i]->base.s_fence->scheduled;1295dma_fence_get(fence);1296r = drm_sched_job_add_dependency(&leader->base, fence);1297if (r) {1298dma_fence_put(fence);1299return r;1300}1301}13021303if (p->gang_size > 1) {1304for (i = 0; i < p->gang_size; ++i)1305amdgpu_job_set_gang_leader(p->jobs[i], leader);1306}13071308/* No memory allocation is allowed while holding the notifier lock.1309* The lock is held until amdgpu_cs_submit is finished and fence is1310* added to BOs.1311*/1312mutex_lock(&p->adev->notifier_lock);13131314/* If userptr are invalidated after amdgpu_cs_parser_bos(), return1315* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.1316*/1317r = 0;1318amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {1319r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,1320e->range);1321e->range = NULL;1322}1323if (r) {1324r = -EAGAIN;1325mutex_unlock(&p->adev->notifier_lock);1326return r;1327}13281329p->fence = dma_fence_get(&leader->base.s_fence->finished);1330drm_exec_for_each_locked_object(&p->exec, index, gobj) {13311332ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);13331334/* Everybody except for the gang leader uses READ */1335for (i = 0; i < p->gang_size; ++i) {1336if (p->jobs[i] == leader)1337continue;13381339dma_resv_add_fence(gobj->resv,1340&p->jobs[i]->base.s_fence->finished,1341DMA_RESV_USAGE_READ);1342}13431344/* The gang leader as remembered as writer */1345dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);1346}13471348seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],1349p->fence);1350amdgpu_cs_post_dependencies(p);13511352if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&1353!p->ctx->preamble_presented) {1354leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;1355p->ctx->preamble_presented = true;1356}13571358cs->out.handle = seq;1359leader->uf_sequence = seq;13601361amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket);1362for (i = 0; i < p->gang_size; ++i) {1363amdgpu_job_free_resources(p->jobs[i]);1364trace_amdgpu_cs_ioctl(p->jobs[i]);1365drm_sched_entity_push_job(&p->jobs[i]->base);1366p->jobs[i] = NULL;1367}13681369amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);13701371mutex_unlock(&p->adev->notifier_lock);1372mutex_unlock(&p->bo_list->bo_list_mutex);1373return 0;1374}13751376/* Cleanup the parser structure */1377static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)1378{1379unsigned int i;13801381amdgpu_sync_free(&parser->sync);1382drm_exec_fini(&parser->exec);13831384for (i = 0; i < parser->num_post_deps; i++) {1385drm_syncobj_put(parser->post_deps[i].syncobj);1386kfree(parser->post_deps[i].chain);1387}1388kfree(parser->post_deps);13891390dma_fence_put(parser->fence);13911392if (parser->ctx)1393amdgpu_ctx_put(parser->ctx);1394if (parser->bo_list)1395amdgpu_bo_list_put(parser->bo_list);13961397for (i = 0; i < parser->nchunks; i++)1398kvfree(parser->chunks[i].kdata);1399kvfree(parser->chunks);1400for (i = 0; i < parser->gang_size; ++i) {1401if (parser->jobs[i])1402amdgpu_job_free(parser->jobs[i]);1403}1404amdgpu_bo_unref(&parser->uf_bo);1405}14061407int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)1408{1409struct amdgpu_device *adev = drm_to_adev(dev);1410struct amdgpu_cs_parser parser;1411int r;14121413if (amdgpu_ras_intr_triggered())1414return -EHWPOISON;14151416if (!adev->accel_working)1417return -EBUSY;14181419r = amdgpu_cs_parser_init(&parser, adev, filp, data);1420if (r) {1421drm_err_ratelimited(dev, "Failed to initialize parser %d!\n", r);1422return r;1423}14241425r = amdgpu_cs_pass1(&parser, data);1426if (r)1427goto error_fini;14281429r = amdgpu_cs_pass2(&parser);1430if (r)1431goto error_fini;14321433r = amdgpu_cs_parser_bos(&parser, data);1434if (r) {1435if (r == -ENOMEM)1436drm_err(dev, "Not enough memory for command submission!\n");1437else if (r != -ERESTARTSYS && r != -EAGAIN)1438drm_dbg(dev, "Failed to process the buffer list %d!\n", r);1439goto error_fini;1440}14411442r = amdgpu_cs_patch_jobs(&parser);1443if (r)1444goto error_backoff;14451446r = amdgpu_cs_vm_handling(&parser);1447if (r)1448goto error_backoff;14491450r = amdgpu_cs_sync_rings(&parser);1451if (r)1452goto error_backoff;14531454trace_amdgpu_cs_ibs(&parser);14551456r = amdgpu_cs_submit(&parser, data);1457if (r)1458goto error_backoff;14591460amdgpu_cs_parser_fini(&parser);1461return 0;14621463error_backoff:1464mutex_unlock(&parser.bo_list->bo_list_mutex);14651466error_fini:1467amdgpu_cs_parser_fini(&parser);1468return r;1469}14701471/**1472* amdgpu_cs_wait_ioctl - wait for a command submission to finish1473*1474* @dev: drm device1475* @data: data from userspace1476* @filp: file private1477*1478* Wait for the command submission identified by handle to finish.1479*/1480int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,1481struct drm_file *filp)1482{1483union drm_amdgpu_wait_cs *wait = data;1484unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);1485struct drm_sched_entity *entity;1486struct amdgpu_ctx *ctx;1487struct dma_fence *fence;1488long r;14891490ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);1491if (ctx == NULL)1492return -EINVAL;14931494r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,1495wait->in.ring, &entity);1496if (r) {1497amdgpu_ctx_put(ctx);1498return r;1499}15001501fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);1502if (IS_ERR(fence))1503r = PTR_ERR(fence);1504else if (fence) {1505r = dma_fence_wait_timeout(fence, true, timeout);1506if (r > 0 && fence->error)1507r = fence->error;1508dma_fence_put(fence);1509} else1510r = 1;15111512amdgpu_ctx_put(ctx);1513if (r < 0)1514return r;15151516memset(wait, 0, sizeof(*wait));1517wait->out.status = (r == 0);15181519return 0;1520}15211522/**1523* amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence1524*1525* @adev: amdgpu device1526* @filp: file private1527* @user: drm_amdgpu_fence copied from user space1528*/1529static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,1530struct drm_file *filp,1531struct drm_amdgpu_fence *user)1532{1533struct drm_sched_entity *entity;1534struct amdgpu_ctx *ctx;1535struct dma_fence *fence;1536int r;15371538ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);1539if (ctx == NULL)1540return ERR_PTR(-EINVAL);15411542r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,1543user->ring, &entity);1544if (r) {1545amdgpu_ctx_put(ctx);1546return ERR_PTR(r);1547}15481549fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);1550amdgpu_ctx_put(ctx);15511552return fence;1553}15541555int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,1556struct drm_file *filp)1557{1558struct amdgpu_device *adev = drm_to_adev(dev);1559union drm_amdgpu_fence_to_handle *info = data;1560struct dma_fence *fence;1561struct drm_syncobj *syncobj;1562struct sync_file *sync_file;1563int fd, r;15641565fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);1566if (IS_ERR(fence))1567return PTR_ERR(fence);15681569if (!fence)1570fence = dma_fence_get_stub();15711572switch (info->in.what) {1573case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:1574r = drm_syncobj_create(&syncobj, 0, fence);1575dma_fence_put(fence);1576if (r)1577return r;1578r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);1579drm_syncobj_put(syncobj);1580return r;15811582case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:1583r = drm_syncobj_create(&syncobj, 0, fence);1584dma_fence_put(fence);1585if (r)1586return r;1587r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);1588drm_syncobj_put(syncobj);1589return r;15901591case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:1592fd = get_unused_fd_flags(O_CLOEXEC);1593if (fd < 0) {1594dma_fence_put(fence);1595return fd;1596}15971598sync_file = sync_file_create(fence);1599dma_fence_put(fence);1600if (!sync_file) {1601put_unused_fd(fd);1602return -ENOMEM;1603}16041605fd_install(fd, sync_file->file);1606info->out.handle = fd;1607return 0;16081609default:1610dma_fence_put(fence);1611return -EINVAL;1612}1613}16141615/**1616* amdgpu_cs_wait_all_fences - wait on all fences to signal1617*1618* @adev: amdgpu device1619* @filp: file private1620* @wait: wait parameters1621* @fences: array of drm_amdgpu_fence1622*/1623static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,1624struct drm_file *filp,1625union drm_amdgpu_wait_fences *wait,1626struct drm_amdgpu_fence *fences)1627{1628uint32_t fence_count = wait->in.fence_count;1629unsigned int i;1630long r = 1;16311632for (i = 0; i < fence_count; i++) {1633struct dma_fence *fence;1634unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);16351636fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);1637if (IS_ERR(fence))1638return PTR_ERR(fence);1639else if (!fence)1640continue;16411642r = dma_fence_wait_timeout(fence, true, timeout);1643if (r > 0 && fence->error)1644r = fence->error;16451646dma_fence_put(fence);1647if (r < 0)1648return r;16491650if (r == 0)1651break;1652}16531654memset(wait, 0, sizeof(*wait));1655wait->out.status = (r > 0);16561657return 0;1658}16591660/**1661* amdgpu_cs_wait_any_fence - wait on any fence to signal1662*1663* @adev: amdgpu device1664* @filp: file private1665* @wait: wait parameters1666* @fences: array of drm_amdgpu_fence1667*/1668static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,1669struct drm_file *filp,1670union drm_amdgpu_wait_fences *wait,1671struct drm_amdgpu_fence *fences)1672{1673unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);1674uint32_t fence_count = wait->in.fence_count;1675uint32_t first = ~0;1676struct dma_fence **array;1677unsigned int i;1678long r;16791680/* Prepare the fence array */1681array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);16821683if (array == NULL)1684return -ENOMEM;16851686for (i = 0; i < fence_count; i++) {1687struct dma_fence *fence;16881689fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);1690if (IS_ERR(fence)) {1691r = PTR_ERR(fence);1692goto err_free_fence_array;1693} else if (fence) {1694array[i] = fence;1695} else { /* NULL, the fence has been already signaled */1696r = 1;1697first = i;1698goto out;1699}1700}17011702r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,1703&first);1704if (r < 0)1705goto err_free_fence_array;17061707out:1708memset(wait, 0, sizeof(*wait));1709wait->out.status = (r > 0);1710wait->out.first_signaled = first;17111712if (first < fence_count && array[first])1713r = array[first]->error;1714else1715r = 0;17161717err_free_fence_array:1718for (i = 0; i < fence_count; i++)1719dma_fence_put(array[i]);1720kfree(array);17211722return r;1723}17241725/**1726* amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish1727*1728* @dev: drm device1729* @data: data from userspace1730* @filp: file private1731*/1732int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,1733struct drm_file *filp)1734{1735struct amdgpu_device *adev = drm_to_adev(dev);1736union drm_amdgpu_wait_fences *wait = data;1737struct drm_amdgpu_fence *fences;1738int r;17391740/* Get the fences from userspace */1741fences = memdup_array_user(u64_to_user_ptr(wait->in.fences),1742wait->in.fence_count,1743sizeof(struct drm_amdgpu_fence));1744if (IS_ERR(fences))1745return PTR_ERR(fences);17461747if (wait->in.wait_all)1748r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);1749else1750r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);17511752kfree(fences);17531754return r;1755}17561757/**1758* amdgpu_cs_find_mapping - find bo_va for VM address1759*1760* @parser: command submission parser context1761* @addr: VM address1762* @bo: resulting BO of the mapping found1763* @map: Placeholder to return found BO mapping1764*1765* Search the buffer objects in the command submission context for a certain1766* virtual memory address. Returns allocation structure when found, NULL1767* otherwise.1768*/1769int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,1770uint64_t addr, struct amdgpu_bo **bo,1771struct amdgpu_bo_va_mapping **map)1772{1773struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;1774struct ttm_operation_ctx ctx = { false, false };1775struct amdgpu_vm *vm = &fpriv->vm;1776struct amdgpu_bo_va_mapping *mapping;1777int i, r;17781779addr /= AMDGPU_GPU_PAGE_SIZE;17801781mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);1782if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)1783return -EINVAL;17841785*bo = mapping->bo_va->base.bo;1786*map = mapping;17871788/* Double check that the BO is reserved by this CS */1789if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)1790return -EINVAL;17911792/* Make sure VRAM is allocated contigiously */1793(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;1794if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&1795!((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {17961797amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);1798for (i = 0; i < (*bo)->placement.num_placement; i++)1799(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;1800r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);1801if (r)1802return r;1803}18041805return amdgpu_ttm_alloc_gart(&(*bo)->tbo);1806}180718081809