// SPDX-License-Identifier: GPL-2.0-only1/*2* FP/SIMD context switching and fault handling3*4* Copyright (C) 2012 ARM Ltd.5* Author: Catalin Marinas <[email protected]>6*/78#include <linux/bitmap.h>9#include <linux/bitops.h>10#include <linux/bottom_half.h>11#include <linux/bug.h>12#include <linux/cache.h>13#include <linux/compat.h>14#include <linux/compiler.h>15#include <linux/cpu.h>16#include <linux/cpu_pm.h>17#include <linux/ctype.h>18#include <linux/kernel.h>19#include <linux/linkage.h>20#include <linux/irqflags.h>21#include <linux/init.h>22#include <linux/percpu.h>23#include <linux/prctl.h>24#include <linux/preempt.h>25#include <linux/ptrace.h>26#include <linux/sched/signal.h>27#include <linux/sched/task_stack.h>28#include <linux/signal.h>29#include <linux/slab.h>30#include <linux/stddef.h>31#include <linux/sysctl.h>32#include <linux/swab.h>3334#include <asm/esr.h>35#include <asm/exception.h>36#include <asm/fpsimd.h>37#include <asm/cpufeature.h>38#include <asm/cputype.h>39#include <asm/neon.h>40#include <asm/processor.h>41#include <asm/simd.h>42#include <asm/sigcontext.h>43#include <asm/sysreg.h>44#include <asm/traps.h>45#include <asm/virt.h>4647#define FPEXC_IOF (1 << 0)48#define FPEXC_DZF (1 << 1)49#define FPEXC_OFF (1 << 2)50#define FPEXC_UFF (1 << 3)51#define FPEXC_IXF (1 << 4)52#define FPEXC_IDF (1 << 7)5354/*55* (Note: in this discussion, statements about FPSIMD apply equally to SVE.)56*57* In order to reduce the number of times the FPSIMD state is needlessly saved58* and restored, we need to keep track of two things:59* (a) for each task, we need to remember which CPU was the last one to have60* the task's FPSIMD state loaded into its FPSIMD registers;61* (b) for each CPU, we need to remember which task's userland FPSIMD state has62* been loaded into its FPSIMD registers most recently, or whether it has63* been used to perform kernel mode NEON in the meantime.64*65* For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to66* the id of the current CPU every time the state is loaded onto a CPU. For (b),67* we add the per-cpu variable 'fpsimd_last_state' (below), which contains the68* address of the userland FPSIMD state of the task that was loaded onto the CPU69* the most recently, or NULL if kernel mode NEON has been performed after that.70*71* With this in place, we no longer have to restore the next FPSIMD state right72* when switching between tasks. Instead, we can defer this check to userland73* resume, at which time we verify whether the CPU's fpsimd_last_state and the74* task's fpsimd_cpu are still mutually in sync. If this is the case, we75* can omit the FPSIMD restore.76*77* As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to78* indicate whether or not the userland FPSIMD state of the current task is79* present in the registers. The flag is set unless the FPSIMD registers of this80* CPU currently contain the most recent userland FPSIMD state of the current81* task. If the task is behaving as a VMM, then this is will be managed by82* KVM which will clear it to indicate that the vcpu FPSIMD state is currently83* loaded on the CPU, allowing the state to be saved if a FPSIMD-aware84* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and85* flag the register state as invalid.86*87* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be88* called from softirq context, which will save the task's FPSIMD context back89* to task_struct. To prevent this from racing with the manipulation of the90* task's FPSIMD state from task context and thereby corrupting the state, it91* is necessary to protect any manipulation of a task's fpsimd_state or92* TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend93* softirq servicing entirely until put_cpu_fpsimd_context() is called.94*95* For a certain task, the sequence may look something like this:96* - the task gets scheduled in; if both the task's fpsimd_cpu field97* contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu98* variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is99* cleared, otherwise it is set;100*101* - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's102* userland FPSIMD state is copied from memory to the registers, the task's103* fpsimd_cpu field is set to the id of the current CPU, the current104* CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the105* TIF_FOREIGN_FPSTATE flag is cleared;106*107* - the task executes an ordinary syscall; upon return to userland, the108* TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is109* restored;110*111* - the task executes a syscall which executes some NEON instructions; this is112* preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD113* register contents to memory, clears the fpsimd_last_state per-cpu variable114* and sets the TIF_FOREIGN_FPSTATE flag;115*116* - the task gets preempted after kernel_neon_end() is called; as we have not117* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so118* whatever is in the FPSIMD registers is not saved to memory, but discarded.119*/120121DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);122123__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {124#ifdef CONFIG_ARM64_SVE125[ARM64_VEC_SVE] = {126.type = ARM64_VEC_SVE,127.name = "SVE",128.min_vl = SVE_VL_MIN,129.max_vl = SVE_VL_MIN,130.max_virtualisable_vl = SVE_VL_MIN,131},132#endif133#ifdef CONFIG_ARM64_SME134[ARM64_VEC_SME] = {135.type = ARM64_VEC_SME,136.name = "SME",137},138#endif139};140141static unsigned int vec_vl_inherit_flag(enum vec_type type)142{143switch (type) {144case ARM64_VEC_SVE:145return TIF_SVE_VL_INHERIT;146case ARM64_VEC_SME:147return TIF_SME_VL_INHERIT;148default:149WARN_ON_ONCE(1);150return 0;151}152}153154struct vl_config {155int __default_vl; /* Default VL for tasks */156};157158static struct vl_config vl_config[ARM64_VEC_MAX];159160static inline int get_default_vl(enum vec_type type)161{162return READ_ONCE(vl_config[type].__default_vl);163}164165#ifdef CONFIG_ARM64_SVE166167static inline int get_sve_default_vl(void)168{169return get_default_vl(ARM64_VEC_SVE);170}171172static inline void set_default_vl(enum vec_type type, int val)173{174WRITE_ONCE(vl_config[type].__default_vl, val);175}176177static inline void set_sve_default_vl(int val)178{179set_default_vl(ARM64_VEC_SVE, val);180}181182static u8 *efi_sve_state;183184#else /* ! CONFIG_ARM64_SVE */185186/* Dummy declaration for code that will be optimised out: */187extern u8 *efi_sve_state;188189#endif /* ! CONFIG_ARM64_SVE */190191#ifdef CONFIG_ARM64_SME192193static int get_sme_default_vl(void)194{195return get_default_vl(ARM64_VEC_SME);196}197198static void set_sme_default_vl(int val)199{200set_default_vl(ARM64_VEC_SME, val);201}202203static void sme_free(struct task_struct *);204205#else206207static inline void sme_free(struct task_struct *t) { }208209#endif210211static void fpsimd_bind_task_to_cpu(void);212213/*214* Claim ownership of the CPU FPSIMD context for use by the calling context.215*216* The caller may freely manipulate the FPSIMD context metadata until217* put_cpu_fpsimd_context() is called.218*219* On RT kernels local_bh_disable() is not sufficient because it only220* serializes soft interrupt related sections via a local lock, but stays221* preemptible. Disabling preemption is the right choice here as bottom222* half processing is always in thread context on RT kernels so it223* implicitly prevents bottom half processing as well.224*/225static void get_cpu_fpsimd_context(void)226{227if (!IS_ENABLED(CONFIG_PREEMPT_RT))228local_bh_disable();229else230preempt_disable();231}232233/*234* Release the CPU FPSIMD context.235*236* Must be called from a context in which get_cpu_fpsimd_context() was237* previously called, with no call to put_cpu_fpsimd_context() in the238* meantime.239*/240static void put_cpu_fpsimd_context(void)241{242if (!IS_ENABLED(CONFIG_PREEMPT_RT))243local_bh_enable();244else245preempt_enable();246}247248unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)249{250return task->thread.vl[type];251}252253void task_set_vl(struct task_struct *task, enum vec_type type,254unsigned long vl)255{256task->thread.vl[type] = vl;257}258259unsigned int task_get_vl_onexec(const struct task_struct *task,260enum vec_type type)261{262return task->thread.vl_onexec[type];263}264265void task_set_vl_onexec(struct task_struct *task, enum vec_type type,266unsigned long vl)267{268task->thread.vl_onexec[type] = vl;269}270271/*272* TIF_SME controls whether a task can use SME without trapping while273* in userspace, when TIF_SME is set then we must have storage274* allocated in sve_state and sme_state to store the contents of both ZA275* and the SVE registers for both streaming and non-streaming modes.276*277* If both SVCR.ZA and SVCR.SM are disabled then at any point we278* may disable TIF_SME and reenable traps.279*/280281282/*283* TIF_SVE controls whether a task can use SVE without trapping while284* in userspace, and also (together with TIF_SME) the way a task's285* FPSIMD/SVE state is stored in thread_struct.286*287* The kernel uses this flag to track whether a user task is actively288* using SVE, and therefore whether full SVE register state needs to289* be tracked. If not, the cheaper FPSIMD context handling code can290* be used instead of the more costly SVE equivalents.291*292* * TIF_SVE or SVCR.SM set:293*294* The task can execute SVE instructions while in userspace without295* trapping to the kernel.296*297* During any syscall, the kernel may optionally clear TIF_SVE and298* discard the vector state except for the FPSIMD subset.299*300* * TIF_SVE clear:301*302* An attempt by the user task to execute an SVE instruction causes303* do_sve_acc() to be called, which does some preparation and then304* sets TIF_SVE.305*306* During any syscall, the kernel may optionally clear TIF_SVE and307* discard the vector state except for the FPSIMD subset.308*309* The data will be stored in one of two formats:310*311* * FPSIMD only - FP_STATE_FPSIMD:312*313* When the FPSIMD only state stored task->thread.fp_type is set to314* FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in315* task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are316* logically zero but not stored anywhere; P0-P15 and FFR are not317* stored and have unspecified values from userspace's point of318* view. For hygiene purposes, the kernel zeroes them on next use,319* but userspace is discouraged from relying on this.320*321* task->thread.sve_state does not need to be non-NULL, valid or any322* particular size: it must not be dereferenced and any data stored323* there should be considered stale and not referenced.324*325* * SVE state - FP_STATE_SVE:326*327* When the full SVE state is stored task->thread.fp_type is set to328* FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the329* corresponding Zn), P0-P15 and FFR are encoded in in330* task->thread.sve_state, formatted appropriately for vector331* length task->thread.sve_vl or, if SVCR.SM is set,332* task->thread.sme_vl. The storage for the vector registers in333* task->thread.uw.fpsimd_state should be ignored.334*335* task->thread.sve_state must point to a valid buffer at least336* sve_state_size(task) bytes in size. The data stored in337* task->thread.uw.fpsimd_state.vregs should be considered stale338* and not referenced.339*340* * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state341* irrespective of whether TIF_SVE is clear or set, since these are342* not vector length dependent.343*/344345/*346* Update current's FPSIMD/SVE registers from thread_struct.347*348* This function should be called only when the FPSIMD/SVE state in349* thread_struct is known to be up to date, when preparing to enter350* userspace.351*/352static void task_fpsimd_load(void)353{354bool restore_sve_regs = false;355bool restore_ffr;356357WARN_ON(!system_supports_fpsimd());358WARN_ON(preemptible());359WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));360361if (system_supports_sve() || system_supports_sme()) {362switch (current->thread.fp_type) {363case FP_STATE_FPSIMD:364/* Stop tracking SVE for this task until next use. */365clear_thread_flag(TIF_SVE);366break;367case FP_STATE_SVE:368if (!thread_sm_enabled(¤t->thread))369WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE));370371if (test_thread_flag(TIF_SVE))372sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);373374restore_sve_regs = true;375restore_ffr = true;376break;377default:378/*379* This indicates either a bug in380* fpsimd_save_user_state() or memory corruption, we381* should always record an explicit format382* when we save. We always at least have the383* memory allocated for FPSIMD registers so384* try that and hope for the best.385*/386WARN_ON_ONCE(1);387clear_thread_flag(TIF_SVE);388break;389}390}391392/* Restore SME, override SVE register configuration if needed */393if (system_supports_sme()) {394unsigned long sme_vl = task_get_sme_vl(current);395396/* Ensure VL is set up for restoring data */397if (test_thread_flag(TIF_SME))398sme_set_vq(sve_vq_from_vl(sme_vl) - 1);399400write_sysreg_s(current->thread.svcr, SYS_SVCR);401402if (thread_za_enabled(¤t->thread))403sme_load_state(current->thread.sme_state,404system_supports_sme2());405406if (thread_sm_enabled(¤t->thread))407restore_ffr = system_supports_fa64();408}409410if (system_supports_fpmr())411write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);412413if (restore_sve_regs) {414WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);415sve_load_state(sve_pffr(¤t->thread),416¤t->thread.uw.fpsimd_state.fpsr,417restore_ffr);418} else {419WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);420fpsimd_load_state(¤t->thread.uw.fpsimd_state);421}422}423424/*425* Ensure FPSIMD/SVE storage in memory for the loaded context is up to426* date with respect to the CPU registers. Note carefully that the427* current context is the context last bound to the CPU stored in428* last, if KVM is involved this may be the guest VM context rather429* than the host thread for the VM pointed to by current. This means430* that we must always reference the state storage via last rather431* than via current, if we are saving KVM state then it will have432* ensured that the type of registers to save is set in last->to_save.433*/434static void fpsimd_save_user_state(void)435{436struct cpu_fp_state const *last =437this_cpu_ptr(&fpsimd_last_state);438/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */439bool save_sve_regs = false;440bool save_ffr;441unsigned int vl;442443WARN_ON(!system_supports_fpsimd());444WARN_ON(preemptible());445446if (test_thread_flag(TIF_FOREIGN_FPSTATE))447return;448449if (system_supports_fpmr())450*(last->fpmr) = read_sysreg_s(SYS_FPMR);451452/*453* Save SVE state if it is live.454*455* The syscall ABI discards live SVE state at syscall entry. When456* entering a syscall, fpsimd_syscall_enter() sets to_save to457* FP_STATE_FPSIMD to allow the SVE state to be lazily discarded until458* either new SVE state is loaded+bound or fpsimd_syscall_exit() is459* called prior to a return to userspace.460*/461if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) ||462last->to_save == FP_STATE_SVE) {463save_sve_regs = true;464save_ffr = true;465vl = last->sve_vl;466}467468if (system_supports_sme()) {469u64 *svcr = last->svcr;470471*svcr = read_sysreg_s(SYS_SVCR);472473if (*svcr & SVCR_ZA_MASK)474sme_save_state(last->sme_state,475system_supports_sme2());476477/* If we are in streaming mode override regular SVE. */478if (*svcr & SVCR_SM_MASK) {479save_sve_regs = true;480save_ffr = system_supports_fa64();481vl = last->sme_vl;482}483}484485if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {486/* Get the configured VL from RDVL, will account for SM */487if (WARN_ON(sve_get_vl() != vl)) {488/*489* Can't save the user regs, so current would490* re-enter user with corrupt state.491* There's no way to recover, so kill it:492*/493force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);494return;495}496497sve_save_state((char *)last->sve_state +498sve_ffr_offset(vl),499&last->st->fpsr, save_ffr);500*last->fp_type = FP_STATE_SVE;501} else {502fpsimd_save_state(last->st);503*last->fp_type = FP_STATE_FPSIMD;504}505}506507/*508* All vector length selection from userspace comes through here.509* We're on a slow path, so some sanity-checks are included.510* If things go wrong there's a bug somewhere, but try to fall back to a511* safe choice.512*/513static unsigned int find_supported_vector_length(enum vec_type type,514unsigned int vl)515{516struct vl_info *info = &vl_info[type];517int bit;518int max_vl = info->max_vl;519520if (WARN_ON(!sve_vl_valid(vl)))521vl = info->min_vl;522523if (WARN_ON(!sve_vl_valid(max_vl)))524max_vl = info->min_vl;525526if (vl > max_vl)527vl = max_vl;528if (vl < info->min_vl)529vl = info->min_vl;530531bit = find_next_bit(info->vq_map, SVE_VQ_MAX,532__vq_to_bit(sve_vq_from_vl(vl)));533return sve_vl_from_vq(__bit_to_vq(bit));534}535536#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)537538static int vec_proc_do_default_vl(const struct ctl_table *table, int write,539void *buffer, size_t *lenp, loff_t *ppos)540{541struct vl_info *info = table->extra1;542enum vec_type type = info->type;543int ret;544int vl = get_default_vl(type);545struct ctl_table tmp_table = {546.data = &vl,547.maxlen = sizeof(vl),548};549550ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);551if (ret || !write)552return ret;553554/* Writing -1 has the special meaning "set to max": */555if (vl == -1)556vl = info->max_vl;557558if (!sve_vl_valid(vl))559return -EINVAL;560561set_default_vl(type, find_supported_vector_length(type, vl));562return 0;563}564565static const struct ctl_table sve_default_vl_table[] = {566{567.procname = "sve_default_vector_length",568.mode = 0644,569.proc_handler = vec_proc_do_default_vl,570.extra1 = &vl_info[ARM64_VEC_SVE],571},572};573574static int __init sve_sysctl_init(void)575{576if (system_supports_sve())577if (!register_sysctl("abi", sve_default_vl_table))578return -EINVAL;579580return 0;581}582583#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */584static int __init sve_sysctl_init(void) { return 0; }585#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */586587#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)588static const struct ctl_table sme_default_vl_table[] = {589{590.procname = "sme_default_vector_length",591.mode = 0644,592.proc_handler = vec_proc_do_default_vl,593.extra1 = &vl_info[ARM64_VEC_SME],594},595};596597static int __init sme_sysctl_init(void)598{599if (system_supports_sme())600if (!register_sysctl("abi", sme_default_vl_table))601return -EINVAL;602603return 0;604}605606#else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */607static int __init sme_sysctl_init(void) { return 0; }608#endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */609610#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \611(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))612613#ifdef CONFIG_CPU_BIG_ENDIAN614static __uint128_t arm64_cpu_to_le128(__uint128_t x)615{616u64 a = swab64(x);617u64 b = swab64(x >> 64);618619return ((__uint128_t)a << 64) | b;620}621#else622static __uint128_t arm64_cpu_to_le128(__uint128_t x)623{624return x;625}626#endif627628#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)629630static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,631unsigned int vq)632{633unsigned int i;634__uint128_t *p;635636for (i = 0; i < SVE_NUM_ZREGS; ++i) {637p = (__uint128_t *)ZREG(sst, vq, i);638*p = arm64_cpu_to_le128(fst->vregs[i]);639}640}641642/*643* Transfer the FPSIMD state in task->thread.uw.fpsimd_state to644* task->thread.sve_state.645*646* Task can be a non-runnable task, or current. In the latter case,647* the caller must have ownership of the cpu FPSIMD context before calling648* this function.649* task->thread.sve_state must point to at least sve_state_size(task)650* bytes of allocated kernel memory.651* task->thread.uw.fpsimd_state must be up to date before calling this652* function.653*/654static inline void fpsimd_to_sve(struct task_struct *task)655{656unsigned int vq;657void *sst = task->thread.sve_state;658struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;659660if (!system_supports_sve() && !system_supports_sme())661return;662663vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));664__fpsimd_to_sve(sst, fst, vq);665}666667/*668* Transfer the SVE state in task->thread.sve_state to669* task->thread.uw.fpsimd_state.670*671* Task can be a non-runnable task, or current. In the latter case,672* the caller must have ownership of the cpu FPSIMD context before calling673* this function.674* task->thread.sve_state must point to at least sve_state_size(task)675* bytes of allocated kernel memory.676* task->thread.sve_state must be up to date before calling this function.677*/678static inline void sve_to_fpsimd(struct task_struct *task)679{680unsigned int vq, vl;681void const *sst = task->thread.sve_state;682struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;683unsigned int i;684__uint128_t const *p;685686if (!system_supports_sve() && !system_supports_sme())687return;688689vl = thread_get_cur_vl(&task->thread);690vq = sve_vq_from_vl(vl);691for (i = 0; i < SVE_NUM_ZREGS; ++i) {692p = (__uint128_t const *)ZREG(sst, vq, i);693fst->vregs[i] = arm64_le128_to_cpu(*p);694}695}696697static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd)698{699memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs));700}701702/*703* Simulate the effects of an SMSTOP SM instruction.704*/705void task_smstop_sm(struct task_struct *task)706{707if (!thread_sm_enabled(&task->thread))708return;709710__fpsimd_zero_vregs(&task->thread.uw.fpsimd_state);711task->thread.uw.fpsimd_state.fpsr = 0x0800009f;712if (system_supports_fpmr())713task->thread.uw.fpmr = 0;714715task->thread.svcr &= ~SVCR_SM_MASK;716task->thread.fp_type = FP_STATE_FPSIMD;717}718719void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)720{721write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,722SYS_SCTLR_EL1);723}724725#ifdef CONFIG_ARM64_SVE726static void sve_free(struct task_struct *task)727{728kfree(task->thread.sve_state);729task->thread.sve_state = NULL;730}731732/*733* Ensure that task->thread.sve_state is allocated and sufficiently large.734*735* This function should be used only in preparation for replacing736* task->thread.sve_state with new data. The memory is always zeroed737* here to prevent stale data from showing through: this is done in738* the interest of testability and predictability: except in the739* do_sve_acc() case, there is no ABI requirement to hide stale data740* written previously be task.741*/742void sve_alloc(struct task_struct *task, bool flush)743{744if (task->thread.sve_state) {745if (flush)746memset(task->thread.sve_state, 0,747sve_state_size(task));748return;749}750751/* This is a small allocation (maximum ~8KB) and Should Not Fail. */752task->thread.sve_state =753kzalloc(sve_state_size(task), GFP_KERNEL);754}755756/*757* Ensure that task->thread.uw.fpsimd_state is up to date with respect to the758* task's currently effective FPSIMD/SVE state.759*760* The task's FPSIMD/SVE/SME state must not be subject to concurrent761* manipulation.762*/763void fpsimd_sync_from_effective_state(struct task_struct *task)764{765if (task->thread.fp_type == FP_STATE_SVE)766sve_to_fpsimd(task);767}768769/*770* Ensure that the task's currently effective FPSIMD/SVE state is up to date771* with respect to task->thread.uw.fpsimd_state, zeroing any effective772* non-FPSIMD (S)SVE state.773*774* The task's FPSIMD/SVE/SME state must not be subject to concurrent775* manipulation.776*/777void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task)778{779unsigned int vq;780void *sst = task->thread.sve_state;781struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;782783if (task->thread.fp_type != FP_STATE_SVE)784return;785786vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));787788memset(sst, 0, SVE_SIG_REGS_SIZE(vq));789__fpsimd_to_sve(sst, fst, vq);790}791792static int change_live_vector_length(struct task_struct *task,793enum vec_type type,794unsigned long vl)795{796unsigned int sve_vl = task_get_sve_vl(task);797unsigned int sme_vl = task_get_sme_vl(task);798void *sve_state = NULL, *sme_state = NULL;799800if (type == ARM64_VEC_SME)801sme_vl = vl;802else803sve_vl = vl;804805/*806* Allocate the new sve_state and sme_state before freeing the old807* copies so that allocation failure can be handled without needing to808* mutate the task's state in any way.809*810* Changes to the SVE vector length must not discard live ZA state or811* clear PSTATE.ZA, as userspace code which is unaware of the AAPCS64812* ZA lazy saving scheme may attempt to change the SVE vector length813* while unsaved/dormant ZA state exists.814*/815sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL);816if (!sve_state)817goto out_mem;818819if (type == ARM64_VEC_SME) {820sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL);821if (!sme_state)822goto out_mem;823}824825if (task == current)826fpsimd_save_and_flush_current_state();827else828fpsimd_flush_task_state(task);829830/*831* Always preserve PSTATE.SM and the effective FPSIMD state, zeroing832* other SVE state.833*/834fpsimd_sync_from_effective_state(task);835task_set_vl(task, type, vl);836kfree(task->thread.sve_state);837task->thread.sve_state = sve_state;838fpsimd_sync_to_effective_state_zeropad(task);839840if (type == ARM64_VEC_SME) {841task->thread.svcr &= ~SVCR_ZA_MASK;842kfree(task->thread.sme_state);843task->thread.sme_state = sme_state;844}845846return 0;847848out_mem:849kfree(sve_state);850kfree(sme_state);851return -ENOMEM;852}853854int vec_set_vector_length(struct task_struct *task, enum vec_type type,855unsigned long vl, unsigned long flags)856{857bool onexec = flags & PR_SVE_SET_VL_ONEXEC;858bool inherit = flags & PR_SVE_VL_INHERIT;859860if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |861PR_SVE_SET_VL_ONEXEC))862return -EINVAL;863864if (!sve_vl_valid(vl))865return -EINVAL;866867/*868* Clamp to the maximum vector length that VL-agnostic code869* can work with. A flag may be assigned in the future to870* allow setting of larger vector lengths without confusing871* older software.872*/873if (vl > VL_ARCH_MAX)874vl = VL_ARCH_MAX;875876vl = find_supported_vector_length(type, vl);877878if (!onexec && vl != task_get_vl(task, type)) {879if (change_live_vector_length(task, type, vl))880return -ENOMEM;881}882883if (onexec || inherit)884task_set_vl_onexec(task, type, vl);885else886/* Reset VL to system default on next exec: */887task_set_vl_onexec(task, type, 0);888889update_tsk_thread_flag(task, vec_vl_inherit_flag(type),890flags & PR_SVE_VL_INHERIT);891892return 0;893}894895/*896* Encode the current vector length and flags for return.897* This is only required for prctl(): ptrace has separate fields.898* SVE and SME use the same bits for _ONEXEC and _INHERIT.899*900* flags are as for vec_set_vector_length().901*/902static int vec_prctl_status(enum vec_type type, unsigned long flags)903{904int ret;905906if (flags & PR_SVE_SET_VL_ONEXEC)907ret = task_get_vl_onexec(current, type);908else909ret = task_get_vl(current, type);910911if (test_thread_flag(vec_vl_inherit_flag(type)))912ret |= PR_SVE_VL_INHERIT;913914return ret;915}916917/* PR_SVE_SET_VL */918int sve_set_current_vl(unsigned long arg)919{920unsigned long vl, flags;921int ret;922923vl = arg & PR_SVE_VL_LEN_MASK;924flags = arg & ~vl;925926if (!system_supports_sve() || is_compat_task())927return -EINVAL;928929ret = vec_set_vector_length(current, ARM64_VEC_SVE, vl, flags);930if (ret)931return ret;932933return vec_prctl_status(ARM64_VEC_SVE, flags);934}935936/* PR_SVE_GET_VL */937int sve_get_current_vl(void)938{939if (!system_supports_sve() || is_compat_task())940return -EINVAL;941942return vec_prctl_status(ARM64_VEC_SVE, 0);943}944945#ifdef CONFIG_ARM64_SME946/* PR_SME_SET_VL */947int sme_set_current_vl(unsigned long arg)948{949unsigned long vl, flags;950int ret;951952vl = arg & PR_SME_VL_LEN_MASK;953flags = arg & ~vl;954955if (!system_supports_sme() || is_compat_task())956return -EINVAL;957958ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);959if (ret)960return ret;961962return vec_prctl_status(ARM64_VEC_SME, flags);963}964965/* PR_SME_GET_VL */966int sme_get_current_vl(void)967{968if (!system_supports_sme() || is_compat_task())969return -EINVAL;970971return vec_prctl_status(ARM64_VEC_SME, 0);972}973#endif /* CONFIG_ARM64_SME */974975static void vec_probe_vqs(struct vl_info *info,976DECLARE_BITMAP(map, SVE_VQ_MAX))977{978unsigned int vq, vl;979980bitmap_zero(map, SVE_VQ_MAX);981982for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {983write_vl(info->type, vq - 1); /* self-syncing */984985switch (info->type) {986case ARM64_VEC_SVE:987vl = sve_get_vl();988break;989case ARM64_VEC_SME:990vl = sme_get_vl();991break;992default:993vl = 0;994break;995}996997/* Minimum VL identified? */998if (sve_vq_from_vl(vl) > vq)999break;10001001vq = sve_vq_from_vl(vl); /* skip intervening lengths */1002set_bit(__vq_to_bit(vq), map);1003}1004}10051006/*1007* Initialise the set of known supported VQs for the boot CPU.1008* This is called during kernel boot, before secondary CPUs are brought up.1009*/1010void __init vec_init_vq_map(enum vec_type type)1011{1012struct vl_info *info = &vl_info[type];1013vec_probe_vqs(info, info->vq_map);1014bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);1015}10161017/*1018* If we haven't committed to the set of supported VQs yet, filter out1019* those not supported by the current CPU.1020* This function is called during the bring-up of early secondary CPUs only.1021*/1022void vec_update_vq_map(enum vec_type type)1023{1024struct vl_info *info = &vl_info[type];1025DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);10261027vec_probe_vqs(info, tmp_map);1028bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);1029bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,1030SVE_VQ_MAX);1031}10321033/*1034* Check whether the current CPU supports all VQs in the committed set.1035* This function is called during the bring-up of late secondary CPUs only.1036*/1037int vec_verify_vq_map(enum vec_type type)1038{1039struct vl_info *info = &vl_info[type];1040DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);1041unsigned long b;10421043vec_probe_vqs(info, tmp_map);10441045bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);1046if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {1047pr_warn("%s: cpu%d: Required vector length(s) missing\n",1048info->name, smp_processor_id());1049return -EINVAL;1050}10511052if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())1053return 0;10541055/*1056* For KVM, it is necessary to ensure that this CPU doesn't1057* support any vector length that guests may have probed as1058* unsupported.1059*/10601061/* Recover the set of supported VQs: */1062bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);1063/* Find VQs supported that are not globally supported: */1064bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX);10651066/* Find the lowest such VQ, if any: */1067b = find_last_bit(tmp_map, SVE_VQ_MAX);1068if (b >= SVE_VQ_MAX)1069return 0; /* no mismatches */10701071/*1072* Mismatches above sve_max_virtualisable_vl are fine, since1073* no guest is allowed to configure ZCR_EL2.LEN to exceed this:1074*/1075if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) {1076pr_warn("%s: cpu%d: Unsupported vector length(s) present\n",1077info->name, smp_processor_id());1078return -EINVAL;1079}10801081return 0;1082}10831084static void __init sve_efi_setup(void)1085{1086int max_vl = 0;1087int i;10881089if (!IS_ENABLED(CONFIG_EFI))1090return;10911092for (i = 0; i < ARRAY_SIZE(vl_info); i++)1093max_vl = max(vl_info[i].max_vl, max_vl);10941095/*1096* alloc_percpu() warns and prints a backtrace if this goes wrong.1097* This is evidence of a crippled system and we are returning void,1098* so no attempt is made to handle this situation here.1099*/1100if (!sve_vl_valid(max_vl))1101goto fail;11021103efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)),1104GFP_KERNEL);1105if (!efi_sve_state)1106goto fail;11071108return;11091110fail:1111panic("Cannot allocate memory for EFI SVE save/restore");1112}11131114void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)1115{1116write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);1117isb();11181119write_sysreg_s(0, SYS_ZCR_EL1);1120}11211122void __init sve_setup(void)1123{1124struct vl_info *info = &vl_info[ARM64_VEC_SVE];1125DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);1126unsigned long b;1127int max_bit;11281129if (!system_supports_sve())1130return;11311132/*1133* The SVE architecture mandates support for 128-bit vectors,1134* so sve_vq_map must have at least SVE_VQ_MIN set.1135* If something went wrong, at least try to patch it up:1136*/1137if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))1138set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);11391140max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);1141info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));11421143/*1144* For the default VL, pick the maximum supported value <= 64.1145* VL == 64 is guaranteed not to grow the signal frame.1146*/1147set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64));11481149bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map,1150SVE_VQ_MAX);11511152b = find_last_bit(tmp_map, SVE_VQ_MAX);1153if (b >= SVE_VQ_MAX)1154/* No non-virtualisable VLs found */1155info->max_virtualisable_vl = SVE_VQ_MAX;1156else if (WARN_ON(b == SVE_VQ_MAX - 1))1157/* No virtualisable VLs? This is architecturally forbidden. */1158info->max_virtualisable_vl = SVE_VQ_MIN;1159else /* b + 1 < SVE_VQ_MAX */1160info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));11611162if (info->max_virtualisable_vl > info->max_vl)1163info->max_virtualisable_vl = info->max_vl;11641165pr_info("%s: maximum available vector length %u bytes per vector\n",1166info->name, info->max_vl);1167pr_info("%s: default vector length %u bytes per vector\n",1168info->name, get_sve_default_vl());11691170/* KVM decides whether to support mismatched systems. Just warn here: */1171if (sve_max_virtualisable_vl() < sve_max_vl())1172pr_warn("%s: unvirtualisable vector lengths present\n",1173info->name);11741175sve_efi_setup();1176}11771178/*1179* Called from the put_task_struct() path, which cannot get here1180* unless dead_task is really dead and not schedulable.1181*/1182void fpsimd_release_task(struct task_struct *dead_task)1183{1184sve_free(dead_task);1185sme_free(dead_task);1186}11871188#endif /* CONFIG_ARM64_SVE */11891190#ifdef CONFIG_ARM64_SME11911192/*1193* Ensure that task->thread.sme_state is allocated and sufficiently large.1194*1195* This function should be used only in preparation for replacing1196* task->thread.sme_state with new data. The memory is always zeroed1197* here to prevent stale data from showing through: this is done in1198* the interest of testability and predictability, the architecture1199* guarantees that when ZA is enabled it will be zeroed.1200*/1201void sme_alloc(struct task_struct *task, bool flush)1202{1203if (task->thread.sme_state) {1204if (flush)1205memset(task->thread.sme_state, 0,1206sme_state_size(task));1207return;1208}12091210/* This could potentially be up to 64K. */1211task->thread.sme_state =1212kzalloc(sme_state_size(task), GFP_KERNEL);1213}12141215static void sme_free(struct task_struct *task)1216{1217kfree(task->thread.sme_state);1218task->thread.sme_state = NULL;1219}12201221void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)1222{1223/* Set priority for all PEs to architecturally defined minimum */1224write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,1225SYS_SMPRI_EL1);12261227/* Allow SME in kernel */1228write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);1229isb();12301231/* Ensure all bits in SMCR are set to known values */1232write_sysreg_s(0, SYS_SMCR_EL1);12331234/* Allow EL0 to access TPIDR2 */1235write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);1236isb();1237}12381239void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p)1240{1241/* This must be enabled after SME */1242BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME);12431244/* Allow use of ZT0 */1245write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,1246SYS_SMCR_EL1);1247}12481249void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p)1250{1251/* This must be enabled after SME */1252BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME);12531254/* Allow use of FA64 */1255write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,1256SYS_SMCR_EL1);1257}12581259void __init sme_setup(void)1260{1261struct vl_info *info = &vl_info[ARM64_VEC_SME];1262int min_bit, max_bit;12631264if (!system_supports_sme())1265return;12661267min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);12681269/*1270* SME doesn't require any particular vector length be1271* supported but it does require at least one. We should have1272* disabled the feature entirely while bringing up CPUs but1273* let's double check here. The bitmap is SVE_VQ_MAP sized for1274* sharing with SVE.1275*/1276WARN_ON(min_bit >= SVE_VQ_MAX);12771278info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));12791280max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);1281info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));12821283WARN_ON(info->min_vl > info->max_vl);12841285/*1286* For the default VL, pick the maximum supported value <= 321287* (256 bits) if there is one since this is guaranteed not to1288* grow the signal frame when in streaming mode, otherwise the1289* minimum available VL will be used.1290*/1291set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));12921293pr_info("SME: minimum available vector length %u bytes per vector\n",1294info->min_vl);1295pr_info("SME: maximum available vector length %u bytes per vector\n",1296info->max_vl);1297pr_info("SME: default vector length %u bytes per vector\n",1298get_sme_default_vl());1299}13001301void sme_suspend_exit(void)1302{1303u64 smcr = 0;13041305if (!system_supports_sme())1306return;13071308if (system_supports_fa64())1309smcr |= SMCR_ELx_FA64;1310if (system_supports_sme2())1311smcr |= SMCR_ELx_EZT0;13121313write_sysreg_s(smcr, SYS_SMCR_EL1);1314write_sysreg_s(0, SYS_SMPRI_EL1);1315}13161317#endif /* CONFIG_ARM64_SME */13181319static void sve_init_regs(void)1320{1321/*1322* Convert the FPSIMD state to SVE, zeroing all the state that1323* is not shared with FPSIMD. If (as is likely) the current1324* state is live in the registers then do this there and1325* update our metadata for the current task including1326* disabling the trap, otherwise update our in-memory copy.1327* We are guaranteed to not be in streaming mode, we can only1328* take a SVE trap when not in streaming mode and we can't be1329* in streaming mode when taking a SME trap.1330*/1331if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {1332unsigned long vq_minus_one =1333sve_vq_from_vl(task_get_sve_vl(current)) - 1;1334sve_set_vq(vq_minus_one);1335sve_flush_live(true, vq_minus_one);1336fpsimd_bind_task_to_cpu();1337} else {1338fpsimd_to_sve(current);1339current->thread.fp_type = FP_STATE_SVE;1340fpsimd_flush_task_state(current);1341}1342}13431344/*1345* Trapped SVE access1346*1347* Storage is allocated for the full SVE state, the current FPSIMD1348* register contents are migrated across, and the access trap is1349* disabled.1350*1351* TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()1352* would have disabled the SVE access trap for userspace during1353* ret_to_user, making an SVE access trap impossible in that case.1354*/1355void do_sve_acc(unsigned long esr, struct pt_regs *regs)1356{1357/* Even if we chose not to use SVE, the hardware could still trap: */1358if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {1359force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1360return;1361}13621363sve_alloc(current, true);1364if (!current->thread.sve_state) {1365force_sig(SIGKILL);1366return;1367}13681369get_cpu_fpsimd_context();13701371if (test_and_set_thread_flag(TIF_SVE))1372WARN_ON(1); /* SVE access shouldn't have trapped */13731374/*1375* Even if the task can have used streaming mode we can only1376* generate SVE access traps in normal SVE mode and1377* transitioning out of streaming mode may discard any1378* streaming mode state. Always clear the high bits to avoid1379* any potential errors tracking what is properly initialised.1380*/1381sve_init_regs();13821383put_cpu_fpsimd_context();1384}13851386/*1387* Trapped SME access1388*1389* Storage is allocated for the full SVE and SME state, the current1390* FPSIMD register contents are migrated to SVE if SVE is not already1391* active, and the access trap is disabled.1392*1393* TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()1394* would have disabled the SME access trap for userspace during1395* ret_to_user, making an SME access trap impossible in that case.1396*/1397void do_sme_acc(unsigned long esr, struct pt_regs *regs)1398{1399/* Even if we chose not to use SME, the hardware could still trap: */1400if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {1401force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1402return;1403}14041405/*1406* If this not a trap due to SME being disabled then something1407* is being used in the wrong mode, report as SIGILL.1408*/1409if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) {1410force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1411return;1412}14131414sve_alloc(current, false);1415sme_alloc(current, true);1416if (!current->thread.sve_state || !current->thread.sme_state) {1417force_sig(SIGKILL);1418return;1419}14201421get_cpu_fpsimd_context();14221423/* With TIF_SME userspace shouldn't generate any traps */1424if (test_and_set_thread_flag(TIF_SME))1425WARN_ON(1);14261427if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {1428unsigned long vq_minus_one =1429sve_vq_from_vl(task_get_sme_vl(current)) - 1;1430sme_set_vq(vq_minus_one);14311432fpsimd_bind_task_to_cpu();1433} else {1434fpsimd_flush_task_state(current);1435}14361437put_cpu_fpsimd_context();1438}14391440/*1441* Trapped FP/ASIMD access.1442*/1443void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)1444{1445/* Even if we chose not to use FPSIMD, the hardware could still trap: */1446if (!system_supports_fpsimd()) {1447force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);1448return;1449}14501451/*1452* When FPSIMD is enabled, we should never take a trap unless something1453* has gone very wrong.1454*/1455BUG();1456}14571458/*1459* Raise a SIGFPE for the current process.1460*/1461void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)1462{1463unsigned int si_code = FPE_FLTUNK;14641465if (esr & ESR_ELx_FP_EXC_TFV) {1466if (esr & FPEXC_IOF)1467si_code = FPE_FLTINV;1468else if (esr & FPEXC_DZF)1469si_code = FPE_FLTDIV;1470else if (esr & FPEXC_OFF)1471si_code = FPE_FLTOVF;1472else if (esr & FPEXC_UFF)1473si_code = FPE_FLTUND;1474else if (esr & FPEXC_IXF)1475si_code = FPE_FLTRES;1476}14771478send_sig_fault(SIGFPE, si_code,1479(void __user *)instruction_pointer(regs),1480current);1481}14821483static void fpsimd_load_kernel_state(struct task_struct *task)1484{1485struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);14861487/*1488* Elide the load if this CPU holds the most recent kernel mode1489* FPSIMD context of the current task.1490*/1491if (last->st == &task->thread.kernel_fpsimd_state &&1492task->thread.kernel_fpsimd_cpu == smp_processor_id())1493return;14941495fpsimd_load_state(&task->thread.kernel_fpsimd_state);1496}14971498static void fpsimd_save_kernel_state(struct task_struct *task)1499{1500struct cpu_fp_state cpu_fp_state = {1501.st = &task->thread.kernel_fpsimd_state,1502.to_save = FP_STATE_FPSIMD,1503};15041505fpsimd_save_state(&task->thread.kernel_fpsimd_state);1506fpsimd_bind_state_to_cpu(&cpu_fp_state);15071508task->thread.kernel_fpsimd_cpu = smp_processor_id();1509}15101511/*1512* Invalidate any task's FPSIMD state that is present on this cpu.1513* The FPSIMD context should be acquired with get_cpu_fpsimd_context()1514* before calling this function.1515*/1516static void fpsimd_flush_cpu_state(void)1517{1518WARN_ON(!system_supports_fpsimd());1519__this_cpu_write(fpsimd_last_state.st, NULL);15201521/*1522* Leaving streaming mode enabled will cause issues for any kernel1523* NEON and leaving streaming mode or ZA enabled may increase power1524* consumption.1525*/1526if (system_supports_sme())1527sme_smstop();15281529set_thread_flag(TIF_FOREIGN_FPSTATE);1530}15311532void fpsimd_thread_switch(struct task_struct *next)1533{1534bool wrong_task, wrong_cpu;15351536if (!system_supports_fpsimd())1537return;15381539WARN_ON_ONCE(!irqs_disabled());15401541/* Save unsaved fpsimd state, if any: */1542if (test_thread_flag(TIF_KERNEL_FPSTATE))1543fpsimd_save_kernel_state(current);1544else1545fpsimd_save_user_state();15461547if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {1548fpsimd_flush_cpu_state();1549fpsimd_load_kernel_state(next);1550} else {1551/*1552* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's1553* state. For kernel threads, FPSIMD registers are never1554* loaded with user mode FPSIMD state and so wrong_task and1555* wrong_cpu will always be true.1556*/1557wrong_task = __this_cpu_read(fpsimd_last_state.st) !=1558&next->thread.uw.fpsimd_state;1559wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();15601561update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,1562wrong_task || wrong_cpu);1563}1564}15651566static void fpsimd_flush_thread_vl(enum vec_type type)1567{1568int vl, supported_vl;15691570/*1571* Reset the task vector length as required. This is where we1572* ensure that all user tasks have a valid vector length1573* configured: no kernel task can become a user task without1574* an exec and hence a call to this function. By the time the1575* first call to this function is made, all early hardware1576* probing is complete, so __sve_default_vl should be valid.1577* If a bug causes this to go wrong, we make some noise and1578* try to fudge thread.sve_vl to a safe value here.1579*/1580vl = task_get_vl_onexec(current, type);1581if (!vl)1582vl = get_default_vl(type);15831584if (WARN_ON(!sve_vl_valid(vl)))1585vl = vl_info[type].min_vl;15861587supported_vl = find_supported_vector_length(type, vl);1588if (WARN_ON(supported_vl != vl))1589vl = supported_vl;15901591task_set_vl(current, type, vl);15921593/*1594* If the task is not set to inherit, ensure that the vector1595* length will be reset by a subsequent exec:1596*/1597if (!test_thread_flag(vec_vl_inherit_flag(type)))1598task_set_vl_onexec(current, type, 0);1599}16001601void fpsimd_flush_thread(void)1602{1603void *sve_state = NULL;1604void *sme_state = NULL;16051606if (!system_supports_fpsimd())1607return;16081609get_cpu_fpsimd_context();16101611fpsimd_flush_task_state(current);1612memset(¤t->thread.uw.fpsimd_state, 0,1613sizeof(current->thread.uw.fpsimd_state));16141615if (system_supports_sve()) {1616clear_thread_flag(TIF_SVE);16171618/* Defer kfree() while in atomic context */1619sve_state = current->thread.sve_state;1620current->thread.sve_state = NULL;16211622fpsimd_flush_thread_vl(ARM64_VEC_SVE);1623}16241625if (system_supports_sme()) {1626clear_thread_flag(TIF_SME);16271628/* Defer kfree() while in atomic context */1629sme_state = current->thread.sme_state;1630current->thread.sme_state = NULL;16311632fpsimd_flush_thread_vl(ARM64_VEC_SME);1633current->thread.svcr = 0;1634}16351636if (system_supports_fpmr())1637current->thread.uw.fpmr = 0;16381639current->thread.fp_type = FP_STATE_FPSIMD;16401641put_cpu_fpsimd_context();1642kfree(sve_state);1643kfree(sme_state);1644}16451646/*1647* Save the userland FPSIMD state of 'current' to memory, but only if the state1648* currently held in the registers does in fact belong to 'current'1649*/1650void fpsimd_preserve_current_state(void)1651{1652if (!system_supports_fpsimd())1653return;16541655get_cpu_fpsimd_context();1656fpsimd_save_user_state();1657put_cpu_fpsimd_context();1658}16591660/*1661* Associate current's FPSIMD context with this cpu1662* The caller must have ownership of the cpu FPSIMD context before calling1663* this function.1664*/1665static void fpsimd_bind_task_to_cpu(void)1666{1667struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);16681669WARN_ON(!system_supports_fpsimd());1670last->st = ¤t->thread.uw.fpsimd_state;1671last->sve_state = current->thread.sve_state;1672last->sme_state = current->thread.sme_state;1673last->sve_vl = task_get_sve_vl(current);1674last->sme_vl = task_get_sme_vl(current);1675last->svcr = ¤t->thread.svcr;1676last->fpmr = ¤t->thread.uw.fpmr;1677last->fp_type = ¤t->thread.fp_type;1678last->to_save = FP_STATE_CURRENT;1679current->thread.fpsimd_cpu = smp_processor_id();16801681/*1682* Toggle SVE and SME trapping for userspace if needed, these1683* are serialsied by ret_to_user().1684*/1685if (system_supports_sme()) {1686if (test_thread_flag(TIF_SME))1687sme_user_enable();1688else1689sme_user_disable();1690}16911692if (system_supports_sve()) {1693if (test_thread_flag(TIF_SVE))1694sve_user_enable();1695else1696sve_user_disable();1697}1698}16991700void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)1701{1702struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);17031704WARN_ON(!system_supports_fpsimd());1705WARN_ON(!in_softirq() && !irqs_disabled());17061707*last = *state;1708}17091710/*1711* Load the userland FPSIMD state of 'current' from memory, but only if the1712* FPSIMD state already held in the registers is /not/ the most recent FPSIMD1713* state of 'current'. This is called when we are preparing to return to1714* userspace to ensure that userspace sees a good register state.1715*/1716void fpsimd_restore_current_state(void)1717{1718/*1719* TIF_FOREIGN_FPSTATE is set on the init task and copied by1720* arch_dup_task_struct() regardless of whether FP/SIMD is detected.1721* Thus user threads can have this set even when FP/SIMD hasn't been1722* detected.1723*1724* When FP/SIMD is detected, begin_new_exec() will set1725* TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(),1726* and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when1727* switching tasks. We detect FP/SIMD before we exec the first user1728* process, ensuring this has TIF_FOREIGN_FPSTATE set and1729* do_notify_resume() will call fpsimd_restore_current_state() to1730* install the user FP/SIMD context.1731*1732* When FP/SIMD is not detected, nothing else will clear or set1733* TIF_FOREIGN_FPSTATE prior to the first return to userspace, and1734* we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume()1735* looping forever calling fpsimd_restore_current_state().1736*/1737if (!system_supports_fpsimd()) {1738clear_thread_flag(TIF_FOREIGN_FPSTATE);1739return;1740}17411742get_cpu_fpsimd_context();17431744if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {1745task_fpsimd_load();1746fpsimd_bind_task_to_cpu();1747}17481749put_cpu_fpsimd_context();1750}17511752void fpsimd_update_current_state(struct user_fpsimd_state const *state)1753{1754if (WARN_ON(!system_supports_fpsimd()))1755return;17561757current->thread.uw.fpsimd_state = *state;1758if (current->thread.fp_type == FP_STATE_SVE)1759fpsimd_to_sve(current);1760}17611762/*1763* Invalidate live CPU copies of task t's FPSIMD state1764*1765* This function may be called with preemption enabled. The barrier()1766* ensures that the assignment to fpsimd_cpu is visible to any1767* preemption/softirq that could race with set_tsk_thread_flag(), so1768* that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared.1769*1770* The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any1771* subsequent code.1772*/1773void fpsimd_flush_task_state(struct task_struct *t)1774{1775t->thread.fpsimd_cpu = NR_CPUS;1776/*1777* If we don't support fpsimd, bail out after we have1778* reset the fpsimd_cpu for this task and clear the1779* FPSTATE.1780*/1781if (!system_supports_fpsimd())1782return;1783barrier();1784set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);17851786barrier();1787}17881789void fpsimd_save_and_flush_current_state(void)1790{1791if (!system_supports_fpsimd())1792return;17931794get_cpu_fpsimd_context();1795fpsimd_save_user_state();1796fpsimd_flush_task_state(current);1797put_cpu_fpsimd_context();1798}17991800/*1801* Save the FPSIMD state to memory and invalidate cpu view.1802* This function must be called with preemption disabled.1803*/1804void fpsimd_save_and_flush_cpu_state(void)1805{1806unsigned long flags;18071808if (!system_supports_fpsimd())1809return;1810WARN_ON(preemptible());1811local_irq_save(flags);1812fpsimd_save_user_state();1813fpsimd_flush_cpu_state();1814local_irq_restore(flags);1815}18161817#ifdef CONFIG_KERNEL_MODE_NEON18181819/*1820* Kernel-side NEON support functions1821*/18221823/*1824* kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling1825* context1826*1827* Must not be called unless may_use_simd() returns true.1828* Task context in the FPSIMD registers is saved back to memory as necessary.1829*1830* A matching call to kernel_neon_end() must be made before returning from the1831* calling context.1832*1833* The caller may freely use the FPSIMD registers until kernel_neon_end() is1834* called.1835*/1836void kernel_neon_begin(void)1837{1838if (WARN_ON(!system_supports_fpsimd()))1839return;18401841BUG_ON(!may_use_simd());18421843get_cpu_fpsimd_context();18441845/* Save unsaved fpsimd state, if any: */1846if (test_thread_flag(TIF_KERNEL_FPSTATE)) {1847BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());1848fpsimd_save_kernel_state(current);1849} else {1850fpsimd_save_user_state();18511852/*1853* Set the thread flag so that the kernel mode FPSIMD state1854* will be context switched along with the rest of the task1855* state.1856*1857* On non-PREEMPT_RT, softirqs may interrupt task level kernel1858* mode FPSIMD, but the task will not be preemptible so setting1859* TIF_KERNEL_FPSTATE for those would be both wrong (as it1860* would mark the task context FPSIMD state as requiring a1861* context switch) and unnecessary.1862*1863* On PREEMPT_RT, softirqs are serviced from a separate thread,1864* which is scheduled as usual, and this guarantees that these1865* softirqs are not interrupting use of the FPSIMD in kernel1866* mode in task context. So in this case, setting the flag here1867* is always appropriate.1868*/1869if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())1870set_thread_flag(TIF_KERNEL_FPSTATE);1871}18721873/* Invalidate any task state remaining in the fpsimd regs: */1874fpsimd_flush_cpu_state();18751876put_cpu_fpsimd_context();1877}1878EXPORT_SYMBOL_GPL(kernel_neon_begin);18791880/*1881* kernel_neon_end(): give the CPU FPSIMD registers back to the current task1882*1883* Must be called from a context in which kernel_neon_begin() was previously1884* called, with no call to kernel_neon_end() in the meantime.1885*1886* The caller must not use the FPSIMD registers after this function is called,1887* unless kernel_neon_begin() is called again in the meantime.1888*/1889void kernel_neon_end(void)1890{1891if (!system_supports_fpsimd())1892return;18931894/*1895* If we are returning from a nested use of kernel mode FPSIMD, restore1896* the task context kernel mode FPSIMD state. This can only happen when1897* running in softirq context on non-PREEMPT_RT.1898*/1899if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&1900test_thread_flag(TIF_KERNEL_FPSTATE))1901fpsimd_load_kernel_state(current);1902else1903clear_thread_flag(TIF_KERNEL_FPSTATE);1904}1905EXPORT_SYMBOL_GPL(kernel_neon_end);19061907#ifdef CONFIG_EFI19081909static struct user_fpsimd_state efi_fpsimd_state;1910static bool efi_fpsimd_state_used;1911static bool efi_sve_state_used;1912static bool efi_sm_state;19131914/*1915* EFI runtime services support functions1916*1917* The ABI for EFI runtime services allows EFI to use FPSIMD during the call.1918* This means that for EFI (and only for EFI), we have to assume that FPSIMD1919* is always used rather than being an optional accelerator.1920*1921* These functions provide the necessary support for ensuring FPSIMD1922* save/restore in the contexts from which EFI is used.1923*1924* Do not use them for any other purpose -- if tempted to do so, you are1925* either doing something wrong or you need to propose some refactoring.1926*/19271928/*1929* __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call1930*/1931void __efi_fpsimd_begin(void)1932{1933if (!system_supports_fpsimd())1934return;19351936WARN_ON(preemptible());19371938if (may_use_simd()) {1939kernel_neon_begin();1940} else {1941/*1942* If !efi_sve_state, SVE can't be in use yet and doesn't need1943* preserving:1944*/1945if (system_supports_sve() && efi_sve_state != NULL) {1946bool ffr = true;1947u64 svcr;19481949efi_sve_state_used = true;19501951if (system_supports_sme()) {1952svcr = read_sysreg_s(SYS_SVCR);19531954efi_sm_state = svcr & SVCR_SM_MASK;19551956/*1957* Unless we have FA64 FFR does not1958* exist in streaming mode.1959*/1960if (!system_supports_fa64())1961ffr = !(svcr & SVCR_SM_MASK);1962}19631964sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),1965&efi_fpsimd_state.fpsr, ffr);19661967if (system_supports_sme())1968sysreg_clear_set_s(SYS_SVCR,1969SVCR_SM_MASK, 0);19701971} else {1972fpsimd_save_state(&efi_fpsimd_state);1973}19741975efi_fpsimd_state_used = true;1976}1977}19781979/*1980* __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call1981*/1982void __efi_fpsimd_end(void)1983{1984if (!system_supports_fpsimd())1985return;19861987if (!efi_fpsimd_state_used) {1988kernel_neon_end();1989} else {1990if (system_supports_sve() && efi_sve_state_used) {1991bool ffr = true;19921993/*1994* Restore streaming mode; EFI calls are1995* normal function calls so should not return in1996* streaming mode.1997*/1998if (system_supports_sme()) {1999if (efi_sm_state) {2000sysreg_clear_set_s(SYS_SVCR,20010,2002SVCR_SM_MASK);20032004/*2005* Unless we have FA64 FFR does not2006* exist in streaming mode.2007*/2008if (!system_supports_fa64())2009ffr = false;2010}2011}20122013sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),2014&efi_fpsimd_state.fpsr, ffr);20152016efi_sve_state_used = false;2017} else {2018fpsimd_load_state(&efi_fpsimd_state);2019}20202021efi_fpsimd_state_used = false;2022}2023}20242025#endif /* CONFIG_EFI */20262027#endif /* CONFIG_KERNEL_MODE_NEON */20282029#ifdef CONFIG_CPU_PM2030static int fpsimd_cpu_pm_notifier(struct notifier_block *self,2031unsigned long cmd, void *v)2032{2033switch (cmd) {2034case CPU_PM_ENTER:2035fpsimd_save_and_flush_cpu_state();2036break;2037case CPU_PM_EXIT:2038break;2039case CPU_PM_ENTER_FAILED:2040default:2041return NOTIFY_DONE;2042}2043return NOTIFY_OK;2044}20452046static struct notifier_block fpsimd_cpu_pm_notifier_block = {2047.notifier_call = fpsimd_cpu_pm_notifier,2048};20492050static void __init fpsimd_pm_init(void)2051{2052cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);2053}20542055#else2056static inline void fpsimd_pm_init(void) { }2057#endif /* CONFIG_CPU_PM */20582059#ifdef CONFIG_HOTPLUG_CPU2060static int fpsimd_cpu_dead(unsigned int cpu)2061{2062per_cpu(fpsimd_last_state.st, cpu) = NULL;2063return 0;2064}20652066static inline void fpsimd_hotplug_init(void)2067{2068cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",2069NULL, fpsimd_cpu_dead);2070}20712072#else2073static inline void fpsimd_hotplug_init(void) { }2074#endif20752076void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)2077{2078unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;2079write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);2080isb();2081}20822083/*2084* FP/SIMD support code initialisation.2085*/2086static int __init fpsimd_init(void)2087{2088if (cpu_have_named_feature(FP)) {2089fpsimd_pm_init();2090fpsimd_hotplug_init();2091} else {2092pr_notice("Floating-point is not implemented\n");2093}20942095if (!cpu_have_named_feature(ASIMD))2096pr_notice("Advanced SIMD is not implemented\n");209720982099sve_sysctl_init();2100sme_sysctl_init();21012102return 0;2103}2104core_initcall(fpsimd_init);210521062107