// SPDX-License-Identifier: GPL-2.01/*2* Precise Delay Loops for i3863*4* Copyright (C) 1993 Linus Torvalds5* Copyright (C) 1997 Martin Mares <[email protected]>6* Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>7*8* The __delay function must _NOT_ be inlined as its execution time9* depends wildly on alignment on many x86 processors. The additional10* jump magic is needed to get the timing stable on all the CPU's11* we have to worry about.12*/1314#include <linux/export.h>15#include <linux/sched.h>16#include <linux/timex.h>17#include <linux/preempt.h>18#include <linux/delay.h>1920#include <asm/processor.h>21#include <asm/delay.h>22#include <asm/timer.h>23#include <asm/mwait.h>2425#ifdef CONFIG_SMP26# include <asm/smp.h>27#endif2829static void delay_loop(u64 __loops);3031/*32* Calibration and selection of the delay mechanism happens only once33* during boot.34*/35static void (*delay_fn)(u64) __ro_after_init = delay_loop;36static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;3738/* simple loop based delay: */39static void delay_loop(u64 __loops)40{41unsigned long loops = (unsigned long)__loops;4243asm volatile(44" test %0,%0 \n"45" jz 3f \n"46" jmp 1f \n"4748".align 16 \n"49"1: jmp 2f \n"5051".align 16 \n"52"2: dec %0 \n"53" jnz 2b \n"54"3: dec %0 \n"5556: "+a" (loops)57:58);59}6061/* TSC based delay: */62static void delay_tsc(u64 cycles)63{64u64 bclock, now;65int cpu;6667preempt_disable();68cpu = smp_processor_id();69bclock = rdtsc_ordered();70for (;;) {71now = rdtsc_ordered();72if ((now - bclock) >= cycles)73break;7475/* Allow RT tasks to run */76preempt_enable();77native_pause();78preempt_disable();7980/*81* It is possible that we moved to another CPU, and82* since TSC's are per-cpu we need to calculate83* that. The delay must guarantee that we wait "at84* least" the amount of time. Being moved to another85* CPU could make the wait longer but we just need to86* make sure we waited long enough. Rebalance the87* counter for this CPU.88*/89if (unlikely(cpu != smp_processor_id())) {90cycles -= (now - bclock);91cpu = smp_processor_id();92bclock = rdtsc_ordered();93}94}95preempt_enable();96}9798/*99* On Intel the TPAUSE instruction waits until any of:100* 1) the TSC counter exceeds the value provided in EDX:EAX101* 2) global timeout in IA32_UMWAIT_CONTROL is exceeded102* 3) an external interrupt occurs103*/104static void delay_halt_tpause(u64 start, u64 cycles)105{106u64 until = start + cycles;107u32 eax, edx;108109eax = lower_32_bits(until);110edx = upper_32_bits(until);111112/*113* Hard code the deeper (C0.2) sleep state because exit latency is114* small compared to the "microseconds" that usleep() will delay.115*/116__tpause(TPAUSE_C02_STATE, edx, eax);117}118119/*120* On some AMD platforms, MWAITX has a configurable 32-bit timer, that121* counts with TSC frequency. The input value is the number of TSC cycles122* to wait. MWAITX will also exit when the timer expires.123*/124static void delay_halt_mwaitx(u64 unused, u64 cycles)125{126u64 delay;127128delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);129/*130* Use cpu_tss_rw as a cacheline-aligned, seldom accessed per-cpu131* variable as the monitor target.132*/133__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);134135/*136* AMD, like Intel, supports the EAX hint and EAX=0xf means, do not137* enter any deep C-state and we use it here in delay() to minimize138* wakeup latency.139*/140__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);141}142143/*144* Call a vendor specific function to delay for a given amount of time. Because145* these functions may return earlier than requested, check for actual elapsed146* time and call again until done.147*/148static void delay_halt(u64 __cycles)149{150u64 start, end, cycles = __cycles;151152/*153* Timer value of 0 causes MWAITX to wait indefinitely, unless there154* is a store on the memory monitored by MONITORX.155*/156if (!cycles)157return;158159start = rdtsc_ordered();160161for (;;) {162delay_halt_fn(start, cycles);163end = rdtsc_ordered();164165if (cycles <= end - start)166break;167168cycles -= end - start;169start = end;170}171}172173void __init use_tsc_delay(void)174{175if (delay_fn == delay_loop)176delay_fn = delay_tsc;177}178179void __init use_tpause_delay(void)180{181delay_halt_fn = delay_halt_tpause;182delay_fn = delay_halt;183}184185void use_mwaitx_delay(void)186{187delay_halt_fn = delay_halt_mwaitx;188delay_fn = delay_halt;189}190191int read_current_timer(unsigned long *timer_val)192{193if (delay_fn == delay_tsc) {194*timer_val = rdtsc();195return 0;196}197return -1;198}199200void __delay(unsigned long loops)201{202delay_fn(loops);203}204EXPORT_SYMBOL(__delay);205206noinline void __const_udelay(unsigned long xloops)207{208unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;209int d0;210211xloops *= 4;212asm("mull %%edx"213:"=d" (xloops), "=&a" (d0)214:"1" (xloops), "0" (lpj * (HZ / 4)));215216__delay(++xloops);217}218EXPORT_SYMBOL(__const_udelay);219220void __udelay(unsigned long usecs)221{222__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */223}224EXPORT_SYMBOL(__udelay);225226void __ndelay(unsigned long nsecs)227{228__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */229}230EXPORT_SYMBOL(__ndelay);231232233