diff -up kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/asm/msr.h.3.10.0-693.17.1 kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/asm/msr.h --- kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/asm/msr.h.3.10.0-693.17.1 2018-02-23 17:18:23.000000000 +0000 +++ kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/asm/msr.h 2018-03-20 13:07:36.809600413 +0000 @@ -159,6 +159,36 @@ static __always_inline unsigned long lon return rdtsc(); } +/** + * rdtscp() - read the current TSC and (optionally) CPU number, with built-in + * cancellation point replacing barrier - only available + * if static_cpu_has(X86_FEATURE_RDTSCP) . + * returns: The 64-bit Time Stamp Counter (TSC) value. + * Optionally, 'cpu_out' can be non-null, and on return it will contain + * the number (Intel CPU ID) of the CPU that the task is currently running on. + * As does EAX_EDT_RET, this uses the "open-coded asm" style to + * force the compiler + assembler to always use (eax, edx, ecx) registers, + * NOT whole (rax, rdx, rcx) on x86_64 , because only 32-bit + * variables are used - exactly the same code should be generated + * for this instruction on 32-bit as on 64-bit when this asm stanza is used. + * See: SDM , Vol #2, RDTSCP instruction. + */ +static __always_inline u64 rdtscp(u32 *cpu_out) +{ + u32 tsc_lo, tsc_hi, tsc_cpu; + asm volatile + ( "rdtscp" + : "=a" (tsc_lo) + , "=d" (tsc_hi) + , "=c" (tsc_cpu) + ); // : eax, edx, ecx used - NOT rax, rdx, rcx + if ( unlikely(cpu_out != ((void*)0)) ) + *cpu_out = tsc_cpu; + return ((((u64)tsc_hi) << 32) | + (((u64)tsc_lo) & 0x0ffffffffULL ) + ); +} + /* Deprecated, keep it for a cycle for easier merging: */ #define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0) diff -up kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/asm/vgtod.h.3.10.0-693.17.1 kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/asm/vgtod.h --- kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/asm/vgtod.h.3.10.0-693.17.1 2018-02-23 17:18:23.000000000 +0000 +++ kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/asm/vgtod.h 2018-03-20 13:11:53.298339835 +0000 @@ -20,7 +20,13 @@ struct vsyscall_gtod_data { u64 wall_time_snsec; u64 monotonic_time_snsec; time_t monotonic_time_sec; - + time_t monotonic_time_raw_sec; + u64 monotonic_time_raw_nsec; + u64 raw_xtime_nsec; + u32 raw_mult; + u32 raw_shift; + u32 has_rdtscp; + u32 tsc_khz; struct timezone sys_tz; struct timespec wall_time_coarse; struct timespec monotonic_time_coarse; diff -up kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/uapi/asm/vdso_linux_tsc_calibration.h.3.10.0-693.17.1 kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/uapi/asm/vdso_linux_tsc_calibration.h --- kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/uapi/asm/vdso_linux_tsc_calibration.h.3.10.0-693.17.1 2018-03-20 13:07:36.809600413 +0000 +++ kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/include/uapi/asm/vdso_linux_tsc_calibration.h 2018-03-20 13:07:36.809600413 +0000 @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_VDSO_TSC_CALIBRATION_H +#define _ASM_X86_VDSO_TSC_CALIBRATION_H +/* + * Programs that want to use rdtsc / rdtscp instructions + * from user-space can make use of the Linux kernel TSC calibration + * by calling : + * __vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *); + * ( one has to resolve this symbol as in + * tools/testing/selftests/vDSO/parse_vdso.c + * ) + * which fills in a structure + * with the following layout : + */ + +/** struct linux_tsc_calibration - + * mult: amount to multiply 64-bit TSC value by + * shift: the right shift to apply to (mult*TSC) yielding nanoseconds + * tsc_khz: the calibrated TSC frequency in KHz from which previous members calculated + */ +struct linux_tsc_calibration_s +{ + unsigned int tsc_khz; + unsigned int mult; + unsigned int shift; +}; + +/* To use: + * + * static unsigned + * (*linux_tsc_cal)(struct linux_tsc_calibration *linux_tsc_cal) = vdso_sym("LINUX_2.6", "__vdso_linux_tsc_calibration"); + * if( linux_tsc_cal == 0UL ) + * { fprintf(stderr,"the patch providing __vdso_linux_tsc_calibration is not applied to the kernel.\n"); + * return ERROR; + * } + * static const struct linux_tsc_calibration clock_source={0}; + * if( (clock_source.mult==0) && ! (*linux_tsc_cal)(&clock_source) ) + * { fprintf(stderr,"TSC is not the system clocksource.\n"); return } + * unsigned int tsc_lo, tsc_hi, tsc_cpu; + * asm volatile + * ( "rdtscp" : (=a) tsc_hi, (=d) tsc_lo, (=c) tsc_cpu ); + * unsigned long tsc = (((unsigned long)tsc_hi) << 32) | tsc_lo; + * unsigned long nanoseconds = + * (( clock_source . mult ) * tsc ) >> (clock_source . shift); + * + * nanoseconds is now TSC value converted to nanoseconds, + * according to Linux' clocksource calibration values. + * Incidentally, 'tsc_cpu' is the number of the CPU the task is running on. + * + * But better results are obtained by applying this to the difference (delta) + * and adding this to some previous timespec value: + * static u64 previous_tsc=0, previous_nsec=0, previous_sec=0; + * u64 tsc = rdtscp(); + * u64 delta = tsc - previous_tsc; + * u64 nsec = ((delta * clock_source.mult) + previous_nsec ) + * >> clock_source.shift; + * ts->tv_sec = previous_sec + (nsec / NSEC_PER_SEC); + * ts->tv_nsec = nsec % NSEC_PER_SEC; + * previous_tsc = tsc + * previous_sec = ts->tv_sec; + * previous_nsec = ts->tv_nsec << clock_source.shift; + * return ts; + * This is the approach taken by Linux kernel & in VDSO . + * + * Or, in user-space, with floating point, one could use the rdtscp value as number of picoseconds : + * u64 ns = lround( ((double)rdtscp()) / (((double)clock_source.tsc_khz) / 1e3) ); + * (ie. if tsc_khz is 3000 , there are 3 tsc ticks per nanosecond, so divide tsc ticks by 3). + * + * There should actually be very little difference between the two values obtained (@ 0.02% ) + * by either method. + */ + +#endif diff -up kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/kernel/vsyscall_64.c.3.10.0-693.17.1 kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/kernel/vsyscall_64.c --- kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/kernel/vsyscall_64.c.3.10.0-693.17.1 2018-02-23 17:18:23.000000000 +0000 +++ kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/kernel/vsyscall_64.c 2018-03-20 13:12:59.558006730 +0000 @@ -49,6 +49,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" @@ -56,6 +57,8 @@ DEFINE_VVAR(int, vgetcpu_mode); DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); +extern u32 tsc_khz; + enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; static int __init vsyscall_setup(char *str) @@ -117,6 +120,14 @@ void update_vsyscall(struct timekeeper * vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, tk->wall_to_monotonic); + vdata->raw_mult = tk->tkr_raw.clock->mult; + vdata->raw_shift = tk->tkr_raw.clock->shift; + vdata->monotonic_time_raw_sec = tk->raw_time.tv_sec; + vdata->monotonic_time_raw_nsec = tk->raw_time.tv_nsec; + vdata->raw_xtime_nsec = tk->tkr_raw.xtime_nsec; + vdata->has_rdtscp = static_cpu_has(X86_FEATURE_RDTSCP); + vdata->tsc_khz = tsc_khz; + write_seqcount_end(&vdata->seq); } diff -up kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vclock_gettime.c.3.10.0-693.17.1 kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vclock_gettime.c --- kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vclock_gettime.c.3.10.0-693.17.1 2018-02-23 17:18:23.000000000 +0000 +++ kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vclock_gettime.c 2018-03-20 13:38:08.959126619 +0000 @@ -24,10 +24,11 @@ #include #include #include +#include #define gtod (&VVAR(vsyscall_gtod_data)) -notrace static cycle_t vread_tsc(void) +static notrace cycle_t vread_tsc(void) { cycle_t ret = (cycle_t)rdtsc_ordered(); u64 last = VVAR(vsyscall_gtod_data).clock.cycle_last; @@ -47,6 +48,16 @@ notrace static cycle_t vread_tsc(void) return last; } +static notrace u64 vread_tsc_raw(void) +{ + u64 tsc = (gtod->has_rdtscp ? rdtscp((void*)0) : (u64)rdtsc_ordered()) + , last = gtod->clock.cycle_last; + if (likely(tsc >= last)) + return tsc; + asm volatile (""); + return last; +} + static notrace cycle_t vread_hpet(void) { return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); @@ -159,8 +170,36 @@ notrace static inline u64 vgetsns(int *m return v * gtod->clock.mult; } +notrace static inline u64 vgetsns_raw(int *mode) +{ + u64 v; + cycles_t cycles; + + if (*mode == VCLOCK_TSC) + cycles = vread_tsc_raw(); + else if (*mode == VCLOCK_HPET) + cycles = vread_hpet(); +#ifdef CONFIG_PV_CLOCK + else if (*mode == VCLOCK_PVCLOCK) + cycles = vread_pvclock(mode); +#endif + else + return 0; + v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; + return v * gtod->raw_mult; +} + +#ifdef RETPOLINE +# define _NO_THUNK_RELOCS_()(indirect_branch("keep"),\ + function_return("keep")) +# define _RETPOLINE_FUNC_ATTR_ __attribute__(_NO_THUNK_RELOCS_()) +#else +# define _RETPOLINE_FUNC_ATTR_ +#endif + /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ -notrace static int __always_inline do_realtime(struct timespec *ts) +notrace static inline _RETPOLINE_FUNC_ATTR_ +int do_realtime(struct timespec *ts) { unsigned long seq; u64 ns; @@ -180,7 +219,8 @@ notrace static int __always_inline do_re return mode; } -notrace static int do_monotonic(struct timespec *ts) +notrace static inline _RETPOLINE_FUNC_ATTR_ +int do_monotonic(struct timespec *ts) { unsigned long seq; u64 ns; @@ -200,7 +240,8 @@ notrace static int do_monotonic(struct t return mode; } -notrace static int do_realtime_coarse(struct timespec *ts) +notrace static inline _RETPOLINE_FUNC_ATTR_ +int do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { @@ -211,7 +252,8 @@ notrace static int do_realtime_coarse(st return 0; } -notrace static int do_monotonic_coarse(struct timespec *ts) +notrace static inline _RETPOLINE_FUNC_ATTR_ +int do_monotonic_coarse(struct timespec *ts) { unsigned long seq; do { @@ -223,10 +265,31 @@ notrace static int do_monotonic_coarse(s return 0; } -notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +notrace static inline _RETPOLINE_FUNC_ATTR_ +int do_monotonic_raw(struct timespec *ts) { - int ret = VCLOCK_NONE; + unsigned long seq; + u64 ns; + int mode; + do { + seq = read_seqcount_begin(>od->seq); + ts->tv_sec = gtod->monotonic_time_raw_sec; + ts->tv_nsec= gtod->monotonic_time_raw_nsec; + mode = gtod->clock.vclock_mode; + ns = gtod->raw_xtime_nsec; + ns += vgetsns_raw(&mode); + ns >>= gtod->raw_shift; + } while (unlikely(read_seqcount_retry(>od->seq, seq))); + + timespec_add_ns( ts, ns ); + return mode; +} + +notrace _RETPOLINE_FUNC_ATTR_ +int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +{ + int ret = VCLOCK_NONE; switch (clock) { case CLOCK_REALTIME: ret = do_realtime(ts); @@ -234,16 +297,22 @@ notrace int __vdso_clock_gettime(clockid case CLOCK_MONOTONIC: ret = do_monotonic(ts); break; + case CLOCK_MONOTONIC_RAW: + ret = do_monotonic_raw(ts); + break; case CLOCK_REALTIME_COARSE: return do_realtime_coarse(ts); case CLOCK_MONOTONIC_COARSE: return do_monotonic_coarse(ts); + default: + break; } - if (ret == VCLOCK_NONE) return vdso_fallback_gettime(clock, ts); return 0; } + +_RETPOLINE_FUNC_ATTR_ int clock_gettime(clockid_t, struct timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); @@ -286,3 +355,31 @@ notrace time_t __vdso_time(time_t *t) } int time(time_t *t) __attribute__((weak, alias("__vdso_time"))); + +extern unsigned +__vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *); + +notrace unsigned +__vdso_linux_tsc_calibration(struct linux_tsc_calibration_s *tsc_cal) +{ + unsigned long seq; + unsigned rval = 0; + do { + seq = read_seqcount_begin(>od->seq); + if ( gtod->clock.vclock_mode == VCLOCK_TSC ) + { + rval = 1; + if ( tsc_cal == ((void*)0UL)) + break ; + tsc_cal -> tsc_khz = gtod->tsc_khz; + tsc_cal -> mult = gtod->raw_mult; + tsc_cal -> shift = gtod->raw_shift; + } + } while (unlikely(read_seqcount_retry(>od->seq, seq))); + + return rval; +} + +unsigned linux_tsc_calibration(void) + __attribute((weak, alias("__vdso_linux_tsc_calibration"))); + diff -up kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vdso.lds.S.3.10.0-693.17.1 kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vdso.lds.S --- kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vdso.lds.S.3.10.0-693.17.1 2018-02-23 17:18:23.000000000 +0000 +++ kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vdso.lds.S 2018-03-20 13:07:36.810600409 +0000 @@ -25,6 +25,8 @@ VERSION { __vdso_getcpu; time; __vdso_time; + linux_tsc_calibration; + __vdso_linux_tsc_calibration; local: *; }; } diff -up kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vdsox32.lds.S.3.10.0-693.17.1 kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vdsox32.lds.S --- kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vdsox32.lds.S.3.10.0-693.17.1 2018-02-23 17:18:23.000000000 +0000 +++ kernel-3.10.0-693.21.1.el7/linux-3.10.0-693.21.1.el7.jvd.x86_64/arch/x86/vdso/vdsox32.lds.S 2018-03-20 13:07:36.810600409 +0000 @@ -21,6 +21,7 @@ VERSION { __vdso_gettimeofday; __vdso_getcpu; __vdso_time; + __vdso_linux_tsc_calibration; local: *; }; }