Commit a03fdb76 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (34 commits)
  time: Prevent 32 bit overflow with set_normalized_timespec()
  clocksource: Delay clocksource down rating to late boot
  clocksource: clocksource_select must be called with mutex locked
  clocksource: Resolve cpu hotplug dead lock with TSC unstable, fix crash
  timers: Drop a function prototype
  clocksource: Resolve cpu hotplug dead lock with TSC unstable
  timer.c: Fix S/390 comments
  timekeeping: Fix invalid getboottime() value
  timekeeping: Fix up read_persistent_clock() breakage on sh
  timekeeping: Increase granularity of read_persistent_clock(), build fix
  time: Introduce CLOCK_REALTIME_COARSE
  x86: Do not unregister PIT clocksource on PIT oneshot setup/shutdown
  clocksource: Avoid clocksource watchdog circular locking dependency
  clocksource: Protect the watchdog rating changes with clocksource_mutex
  clocksource: Call clocksource_change_rating() outside of watchdog_lock
  timekeeping: Introduce read_boot_clock
  timekeeping: Increase granularity of read_persistent_clock()
  timekeeping: Update clocksource with stop_machine
  timekeeping: Add timekeeper read_clock helper functions
  timekeeping: Move NTP adjusted clock multiplier to struct timekeeper
  ...

Fix trivial conflict due to MIPS lemote -> loongson renaming.
parents 202c4675 12e09337
...@@ -253,11 +253,8 @@ static struct clocksource clocksource_32k = { ...@@ -253,11 +253,8 @@ static struct clocksource clocksource_32k = {
*/ */
unsigned long long sched_clock(void) unsigned long long sched_clock(void)
{ {
unsigned long long ret; return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k),
clocksource_32k.mult, clocksource_32k.shift);
ret = (unsigned long long)clocksource_32k.read(&clocksource_32k);
ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift;
return ret;
} }
static int __init omap_init_clocksource_32k(void) static int __init omap_init_clocksource_32k(void)
......
...@@ -72,9 +72,10 @@ static unsigned long read_rtc_mmss(void) ...@@ -72,9 +72,10 @@ static unsigned long read_rtc_mmss(void)
return mktime(year, mon, day, hour, min, sec); return mktime(year, mon, day, hour, min, sec);
} }
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
return read_rtc_mmss(); ts->tv_sec = read_rtc_mmss();
ts->tv_nsec = 0;
} }
int update_persistent_clock(struct timespec now) int update_persistent_clock(struct timespec now)
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include <asm/dec/ioasic.h> #include <asm/dec/ioasic.h>
#include <asm/dec/machtype.h> #include <asm/dec/machtype.h>
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
unsigned int year, mon, day, hour, min, sec, real_year; unsigned int year, mon, day, hour, min, sec, real_year;
unsigned long flags; unsigned long flags;
...@@ -53,7 +53,8 @@ unsigned long read_persistent_clock(void) ...@@ -53,7 +53,8 @@ unsigned long read_persistent_clock(void)
year += real_year - 72 + 2000; year += real_year - 72 + 2000;
return mktime(year, mon, day, hour, min, sec); ts->tv_sec = mktime(year, mon, day, hour, min, sec);
ts->tv_nsec = 0;
} }
/* /*
......
...@@ -135,7 +135,7 @@ static void rtc_end_op(void) ...@@ -135,7 +135,7 @@ static void rtc_end_op(void)
lasat_ndelay(1000); lasat_ndelay(1000);
} }
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
unsigned long word; unsigned long word;
unsigned long flags; unsigned long flags;
...@@ -147,7 +147,8 @@ unsigned long read_persistent_clock(void) ...@@ -147,7 +147,8 @@ unsigned long read_persistent_clock(void)
rtc_end_op(); rtc_end_op();
spin_unlock_irqrestore(&rtc_lock, flags); spin_unlock_irqrestore(&rtc_lock, flags);
return word; ts->tv_sec = word;
ts->tv_nsec = 0;
} }
int rtc_mips_set_mmss(unsigned long time) int rtc_mips_set_mmss(unsigned long time)
......
...@@ -92,10 +92,12 @@ static int rtctmp; ...@@ -92,10 +92,12 @@ static int rtctmp;
int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp, loff_t *ppos) void *buffer, size_t *lenp, loff_t *ppos)
{ {
struct timespec ts;
int r; int r;
if (!write) { if (!write) {
rtctmp = read_persistent_clock(); read_persistent_clock(&ts);
rtctmp = ts.tv_sec;
/* check for time < 0 and set to 0 */ /* check for time < 0 and set to 0 */
if (rtctmp < 0) if (rtctmp < 0)
rtctmp = 0; rtctmp = 0;
...@@ -134,9 +136,11 @@ int sysctl_lasat_rtc(ctl_table *table, ...@@ -134,9 +136,11 @@ int sysctl_lasat_rtc(ctl_table *table,
void *oldval, size_t *oldlenp, void *oldval, size_t *oldlenp,
void *newval, size_t newlen) void *newval, size_t newlen)
{ {
struct timespec ts;
int r; int r;
rtctmp = read_persistent_clock(); read_persistent_clock(&ts);
rtctmp = ts.tv_sec;
if (rtctmp < 0) if (rtctmp < 0)
rtctmp = 0; rtctmp = 0;
r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
......
...@@ -21,7 +21,8 @@ void __init plat_time_init(void) ...@@ -21,7 +21,8 @@ void __init plat_time_init(void)
mips_hpt_frequency = cpu_clock_freq / 2; mips_hpt_frequency = cpu_clock_freq / 2;
} }
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
return mc146818_get_cmos_time(); ts->tv_sec = return mc146818_get_cmos_time();
ts->tv_nsec = 0;
} }
...@@ -100,9 +100,10 @@ static unsigned int __init estimate_cpu_frequency(void) ...@@ -100,9 +100,10 @@ static unsigned int __init estimate_cpu_frequency(void)
return count; return count;
} }
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
return mc146818_get_cmos_time(); ts->tv_sec = mc146818_get_cmos_time();
ts->tv_nsec = 0;
} }
static void __init plat_perf_setup(void) static void __init plat_perf_setup(void)
......
...@@ -70,7 +70,7 @@ void __init bus_error_init(void) ...@@ -70,7 +70,7 @@ void __init bus_error_init(void)
} }
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
unsigned int year, month, day, hour, min, sec; unsigned int year, month, day, hour, min, sec;
unsigned long flags; unsigned long flags;
...@@ -92,7 +92,8 @@ unsigned long read_persistent_clock(void) ...@@ -92,7 +92,8 @@ unsigned long read_persistent_clock(void)
m48t37_base->control = 0x00; m48t37_base->control = 0x00;
spin_unlock_irqrestore(&rtc_lock, flags); spin_unlock_irqrestore(&rtc_lock, flags);
return mktime(year, month, day, hour, min, sec); ts->tv_sec = mktime(year, month, day, hour, min, sec);
ts->tv_nsec = 0;
} }
int rtc_mips_set_time(unsigned long tim) int rtc_mips_set_time(unsigned long tim)
......
...@@ -87,19 +87,26 @@ enum swarm_rtc_type { ...@@ -87,19 +87,26 @@ enum swarm_rtc_type {
enum swarm_rtc_type swarm_rtc_type; enum swarm_rtc_type swarm_rtc_type;
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
unsigned long sec;
switch (swarm_rtc_type) { switch (swarm_rtc_type) {
case RTC_XICOR: case RTC_XICOR:
return xicor_get_time(); sec = xicor_get_time();
break;
case RTC_M4LT81: case RTC_M4LT81:
return m41t81_get_time(); sec = m41t81_get_time();
break;
case RTC_NONE: case RTC_NONE:
default: default:
return mktime(2000, 1, 1, 0, 0, 0); sec = mktime(2000, 1, 1, 0, 0, 0);
break;
} }
ts->tv_sec = sec;
tv->tv_nsec = 0;
} }
int rtc_mips_set_time(unsigned long sec) int rtc_mips_set_time(unsigned long sec)
......
...@@ -182,7 +182,8 @@ void __init plat_time_init(void) ...@@ -182,7 +182,8 @@ void __init plat_time_init(void)
setup_pit_timer(); setup_pit_timer();
} }
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
return -1; ts->tv_sec = -1;
ts->tv_nsec = 0;
} }
...@@ -774,11 +774,12 @@ int update_persistent_clock(struct timespec now) ...@@ -774,11 +774,12 @@ int update_persistent_clock(struct timespec now)
return ppc_md.set_rtc_time(&tm); return ppc_md.set_rtc_time(&tm);
} }
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
struct rtc_time tm; struct rtc_time tm;
static int first = 1; static int first = 1;
ts->tv_nsec = 0;
/* XXX this is a litle fragile but will work okay in the short term */ /* XXX this is a litle fragile but will work okay in the short term */
if (first) { if (first) {
first = 0; first = 0;
...@@ -786,14 +787,18 @@ unsigned long read_persistent_clock(void) ...@@ -786,14 +787,18 @@ unsigned long read_persistent_clock(void)
timezone_offset = ppc_md.time_init(); timezone_offset = ppc_md.time_init();
/* get_boot_time() isn't guaranteed to be safe to call late */ /* get_boot_time() isn't guaranteed to be safe to call late */
if (ppc_md.get_boot_time) if (ppc_md.get_boot_time) {
return ppc_md.get_boot_time() -timezone_offset; ts->tv_sec = ppc_md.get_boot_time() - timezone_offset;
return;
}
}
if (!ppc_md.get_rtc_time) {
ts->tv_sec = 0;
return;
} }
if (!ppc_md.get_rtc_time)
return 0;
ppc_md.get_rtc_time(&tm); ppc_md.get_rtc_time(&tm);
return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec); tm.tm_hour, tm.tm_min, tm.tm_sec);
} }
/* clocksource code */ /* clocksource code */
......
...@@ -184,12 +184,14 @@ static void timing_alert_interrupt(__u16 code) ...@@ -184,12 +184,14 @@ static void timing_alert_interrupt(__u16 code)
static void etr_reset(void); static void etr_reset(void);
static void stp_reset(void); static void stp_reset(void);
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
struct timespec ts; tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts);
}
tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts); void read_boot_clock(struct timespec *ts)
return ts.tv_sec; {
tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
} }
static cycle_t read_tod_clock(struct clocksource *cs) static cycle_t read_tod_clock(struct clocksource *cs)
...@@ -207,6 +209,10 @@ static struct clocksource clocksource_tod = { ...@@ -207,6 +209,10 @@ static struct clocksource clocksource_tod = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS, .flags = CLOCK_SOURCE_IS_CONTINUOUS,
}; };
struct clocksource * __init clocksource_default_clock(void)
{
return &clocksource_tod;
}
void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
{ {
...@@ -244,10 +250,6 @@ void update_vsyscall_tz(void) ...@@ -244,10 +250,6 @@ void update_vsyscall_tz(void)
*/ */
void __init time_init(void) void __init time_init(void)
{ {
struct timespec ts;
unsigned long flags;
cycle_t now;
/* Reset time synchronization interfaces. */ /* Reset time synchronization interfaces. */
etr_reset(); etr_reset();
stp_reset(); stp_reset();
...@@ -263,26 +265,6 @@ void __init time_init(void) ...@@ -263,26 +265,6 @@ void __init time_init(void)
if (clocksource_register(&clocksource_tod) != 0) if (clocksource_register(&clocksource_tod) != 0)
panic("Could not register TOD clock source"); panic("Could not register TOD clock source");
/*
* The TOD clock is an accurate clock. The xtime should be
* initialized in a way that the difference between TOD and
* xtime is reasonably small. Too bad that timekeeping_init
* sets xtime.tv_nsec to zero. In addition the clock source
* change from the jiffies clock source to the TOD clock
* source add another error of up to 1/HZ second. The same
* function sets wall_to_monotonic to a value that is too
* small for /proc/uptime to be accurate.
* Reset xtime and wall_to_monotonic to sane values.
*/
write_seqlock_irqsave(&xtime_lock, flags);
now = get_clock();
tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime);
clocksource_tod.cycle_last = now;
clocksource_tod.raw_time = xtime;
tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts);
set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec);
write_sequnlock_irqrestore(&xtime_lock, flags);
/* Enable TOD clock interrupts on the boot cpu. */ /* Enable TOD clock interrupts on the boot cpu. */
init_cpu_timer(); init_cpu_timer();
......
...@@ -39,11 +39,9 @@ void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time; ...@@ -39,11 +39,9 @@ void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time; int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
#ifdef CONFIG_GENERIC_CMOS_UPDATE #ifdef CONFIG_GENERIC_CMOS_UPDATE
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
struct timespec tv; rtc_sh_get_time(ts);
rtc_sh_get_time(&tv);
return tv.tv_sec;
} }
int update_persistent_clock(struct timespec now) int update_persistent_clock(struct timespec now)
......
...@@ -21,6 +21,7 @@ struct vsyscall_gtod_data { ...@@ -21,6 +21,7 @@ struct vsyscall_gtod_data {
u32 shift; u32 shift;
} clock; } clock;
struct timespec wall_to_monotonic; struct timespec wall_to_monotonic;
struct timespec wall_time_coarse;
}; };
extern struct vsyscall_gtod_data __vsyscall_gtod_data extern struct vsyscall_gtod_data __vsyscall_gtod_data
__section_vsyscall_gtod_data; __section_vsyscall_gtod_data;
......
...@@ -19,12 +19,6 @@ ...@@ -19,12 +19,6 @@
DEFINE_SPINLOCK(i8253_lock); DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock); EXPORT_SYMBOL(i8253_lock);
#ifdef CONFIG_X86_32
static void pit_disable_clocksource(void);
#else
static inline void pit_disable_clocksource(void) { }
#endif
/* /*
* HPET replaces the PIT, when enabled. So we need to know, which of * HPET replaces the PIT, when enabled. So we need to know, which of
* the two timers is used * the two timers is used
...@@ -57,12 +51,10 @@ static void init_pit_timer(enum clock_event_mode mode, ...@@ -57,12 +51,10 @@ static void init_pit_timer(enum clock_event_mode mode,
outb_pit(0, PIT_CH0); outb_pit(0, PIT_CH0);
outb_pit(0, PIT_CH0); outb_pit(0, PIT_CH0);
} }
pit_disable_clocksource();
break; break;
case CLOCK_EVT_MODE_ONESHOT: case CLOCK_EVT_MODE_ONESHOT:
/* One shot setup */ /* One shot setup */
pit_disable_clocksource();
outb_pit(0x38, PIT_MODE); outb_pit(0x38, PIT_MODE);
break; break;
...@@ -200,17 +192,6 @@ static struct clocksource pit_cs = { ...@@ -200,17 +192,6 @@ static struct clocksource pit_cs = {
.shift = 20, .shift = 20,
}; };
static void pit_disable_clocksource(void)
{
/*
* Use mult to check whether it is registered or not
*/
if (pit_cs.mult) {
clocksource_unregister(&pit_cs);
pit_cs.mult = 0;
}
}
static int __init init_pit_clocksource(void) static int __init init_pit_clocksource(void)
{ {
/* /*
......
...@@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime) ...@@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime)
} }
/* not static: needed by APM */ /* not static: needed by APM */
unsigned long read_persistent_clock(void) void read_persistent_clock(struct timespec *ts)
{ {
unsigned long retval, flags; unsigned long retval, flags;
...@@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void) ...@@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void)
retval = get_wallclock(); retval = get_wallclock();
spin_unlock_irqrestore(&rtc_lock, flags); spin_unlock_irqrestore(&rtc_lock, flags);
return retval; ts->tv_sec = retval;
ts->tv_nsec = 0;
} }
int update_persistent_clock(struct timespec now) int update_persistent_clock(struct timespec now)
......
...@@ -744,10 +744,16 @@ static cycle_t __vsyscall_fn vread_tsc(void) ...@@ -744,10 +744,16 @@ static cycle_t __vsyscall_fn vread_tsc(void)
} }
#endif #endif
static void resume_tsc(void)
{
clocksource_tsc.cycle_last = 0;
}
static struct clocksource clocksource_tsc = { static struct clocksource clocksource_tsc = {
.name = "tsc", .name = "tsc",
.rating = 300, .rating = 300,
.read = read_tsc, .read = read_tsc,
.resume = resume_tsc,
.mask = CLOCKSOURCE_MASK(64), .mask = CLOCKSOURCE_MASK(64),
.shift = 22, .shift = 22,
.flags = CLOCK_SOURCE_IS_CONTINUOUS | .flags = CLOCK_SOURCE_IS_CONTINUOUS |
...@@ -761,12 +767,14 @@ void mark_tsc_unstable(char *reason) ...@@ -761,12 +767,14 @@ void mark_tsc_unstable(char *reason)
{ {
if (!tsc_unstable) { if (!tsc_unstable) {
tsc_unstable = 1; tsc_unstable = 1;
printk("Marking TSC unstable due to %s\n", reason); printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
/* Change only the rating, when not registered */ /* Change only the rating, when not registered */
if (clocksource_tsc.mult) if (clocksource_tsc.mult)
clocksource_change_rating(&clocksource_tsc, 0); clocksource_mark_unstable(&clocksource_tsc);
else else {
clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
clocksource_tsc.rating = 0; clocksource_tsc.rating = 0;
}
} }
} }
......
...@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) ...@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
} }
......
...@@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts) ...@@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts)
return 0; return 0;
} }
notrace static noinline int do_realtime_coarse(struct timespec *ts)
{
unsigned long seq;
do {
seq = read_seqbegin(&gtod->lock);
ts->tv_sec = gtod->wall_time_coarse.tv_sec;
ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
} while (unlikely(read_seqretry(&gtod->lock, seq)));
return 0;
}
notrace static noinline int do_monotonic_coarse(struct timespec *ts)
{
unsigned long seq, ns, secs;
do {
seq = read_seqbegin(&gtod->lock);
secs = gtod->wall_time_coarse.tv_sec;
ns = gtod->wall_time_coarse.tv_nsec;
secs += gtod->wall_to_monotonic.tv_sec;
ns += gtod->wall_to_monotonic.tv_nsec;
} while (unlikely(read_seqretry(&gtod->lock, seq)));
vset_normalized_timespec(ts, secs, ns);
return 0;
}
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{ {
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) if (likely(gtod->sysctl_enabled))
switch (clock) { switch (clock) {
case CLOCK_REALTIME: case CLOCK_REALTIME:
return do_realtime(ts); if (likely(gtod->clock.vread))
return do_realtime(ts);
break;
case CLOCK_MONOTONIC: case CLOCK_MONOTONIC:
return do_monotonic(ts); if (likely(gtod->clock.vread))
return do_monotonic(ts);
break;
case CLOCK_REALTIME_COARSE:
return do_realtime_coarse(ts);
case CLOCK_MONOTONIC_COARSE:
return do_monotonic_coarse(ts);
} }
return vdso_fallback_gettime(clock, ts); return vdso_fallback_gettime(clock, ts);
} }
......
...@@ -59,9 +59,8 @@ static struct irqaction timer_irqaction = { ...@@ -59,9 +59,8 @@ static struct irqaction timer_irqaction = {
void __init time_init(void) void __init time_init(void)
{ {
xtime.tv_nsec = 0; /* FIXME: xtime&wall_to_monotonic are set in timekeeping_init. */
xtime.tv_sec = read_persistent_clock(); read_persistent_clock(&xtime);
set_normalized_timespec(&wall_to_monotonic, set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec); -xtime.tv_sec, -xtime.tv_nsec);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/init.h>
#include <asm/div64.h> #include <asm/div64.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -148,14 +149,11 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, ...@@ -148,14 +149,11 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
* @disable: optional function to disable the clocksource * @disable: optional function to disable the clocksource
* @mask: bitmask for two's complement * @mask: bitmask for two's complement
* subtraction of non 64 bit counters * subtraction of non 64 bit counters
* @mult: cycle to nanosecond multiplier (adjusted by NTP) * @mult: cycle to nanosecond multiplier
* @mult_orig: cycle to nanosecond multiplier (unadjusted by NTP)
* @shift: cycle to nanosecond divisor (power of two) * @shift: cycle to nanosecond divisor (power of two)
* @flags: flags describing special properties * @flags: flags describing special properties
* @vread: vsyscall based read * @vread: vsyscall based read
* @resume: resume function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary
* @cycle_interval: Used internally by timekeeping core, please ignore.
* @xtime_interval: Used internally by timekeeping core, please ignore.
*/ */
struct clocksource { struct clocksource {
/* /*
...@@ -169,7 +167,6 @@ struct clocksource { ...@@ -169,7 +167,6 @@ struct clocksource {
void (*disable)(struct clocksource *cs); void (*disable)(struct clocksource *cs);
cycle_t mask; cycle_t mask;
u32 mult; u32 mult;
u32 mult_orig;
u32 shift; u32 shift;
unsigned long flags; unsigned long flags;
cycle_t (*vread)(void); cycle_t (*vread)(void);
...@@ -181,19 +178,12 @@ struct clocksource { ...@@ -181,19 +178,12 @@ struct clocksource {
#define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0) #define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0)
#endif #endif
/* timekeeping specific data, ignore */
cycle_t cycle_interval;
u64 xtime_interval;
u32 raw_interval;
/* /*
* Second part is written at each timer interrupt * Second part is written at each timer interrupt
* Keep it in a different cache line to dirty no * Keep it in a different cache line to dirty no
* more than one cache line. * more than one cache line.
*/ */
cycle_t cycle_last ____cacheline_aligned_in_smp; cycle_t cycle_last ____cacheline_aligned_in_smp;
u64 xtime_nsec;
s64 error;
struct timespec raw_time;
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
/* Watchdog related data, used by the framework */ /* Watchdog related data, used by the framework */
...@@ -202,8 +192,6 @@ struct clocksource { ...@@ -202,8 +192,6 @@ struct clocksource {
#endif #endif
}; };
extern struct clocksource *clock; /* current clocksource */
/* /*
* Clock source flags bits:: * Clock source flags bits::
*/ */
...@@ -212,6 +200,7 @@ extern struct clocksource *clock; /* current clocksource */ ...@@ -212,6 +200,7 @@ extern struct clocksource *clock; /* current clocksource */
#define CLOCK_SOURCE_WATCHDOG 0x10 #define CLOCK_SOURCE_WATCHDOG 0x10
#define CLOCK_SOURCE_VALID_FOR_HRES 0x20 #define CLOCK_SOURCE_VALID_FOR_HRES 0x20
#define CLOCK_SOURCE_UNSTABLE 0x40
/* simplify initialization of mask field */ /* simplify initialization of mask field */
#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
...@@ -268,108 +257,15 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) ...@@ -268,108 +257,15 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
} }
/** /**
* clocksource_read: - Access the clocksource's current cycle value * clocksource_cyc2ns - converts clocksource cycles to nanoseconds
* @cs: pointer to clocksource being read
*
* Uses the clocksource to return the current cycle_t value
*/
static inline cycle_t clocksource_read(struct clocksource *cs)
{
return cs->read(cs);
}
/**
* clocksource_enable: - enable clocksource
* @cs: pointer to clocksource
*
* Enables the specified clocksource. The clocksource callback
* function should start up the hardware and setup mult and field
* members of struct clocksource to reflect hardware capabilities.
*/
static inline int clocksource_enable(struct clocksource *cs)
{
int ret = 0;
if (cs->enable)
ret = cs->enable(cs);
/*
* The frequency may have changed while the clocksource
* was disabled. If so the code in ->enable() must update
* the mult value to reflect the new frequency. Make sure
* mult_orig follows this change.
*/
cs->mult_orig = cs->mult;
return ret;
}
/**
* clocksource_disable: - disable clocksource
* @cs: pointer to clocksource
*
* Disables the specified clocksource. The clocksource callback
* function should power down the now unused hardware block to
* save power.
*/
static inline void clocksource_disable(struct clocksource *cs)
{
/*
* Save mult_orig in mult so clocksource_enable() can
* restore the value regardless if ->enable() updates
* the value of mult or not.
*/
cs->mult = cs->mult_orig;
if (cs->disable)
cs->disable(cs);
}
/**
* cyc2ns - converts clocksource cycles to nanoseconds
* @cs: Pointer to clocksource
* @cycles: Cycles
* *
* Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds. * Converts cycles to nanoseconds, using the given mult and shift.
* *
* XXX - This could use some mult_lxl_ll() asm optimization * XXX - This could use some mult_lxl_ll() asm optimization
*/ */
static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles) static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
{
u64 ret = (u64)cycles;
ret = (ret * cs->mult) >> cs->shift;
return ret;
}
/**
* clocksource_calculate_interval - Calculates a clocksource interval struct
*
* @c: Pointer to clocksource.
* @length_nsec: Desired interval length in nanoseconds.
*
* Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
* pair and interval request.
*
* Unless you're the timekeeping code, you should not be using this!
*/
static inline void clocksource_calculate_interval(struct clocksource *c,
unsigned long length_nsec)
{ {
u64 tmp; return ((u64) cycles * mult) >> shift;
/* Do the ns -> cycle conversion first, using original mult */
tmp = length_nsec;
tmp <<= c->shift;
tmp += c->mult_orig/2;
do_div(tmp, c->mult_orig);
c->cycle_interval = (cycle_t)tmp;
if (c->cycle_interval == 0)
c->cycle_interval = 1;
/* Go back from cycles -> shifted ns, this time use ntp adjused mult */
c->xtime_interval = (u64)c->cycle_interval * c->mult;
c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift;
} }
...@@ -380,6 +276,8 @@ extern void clocksource_touch_watchdog(void); ...@@ -380,6 +276,8 @@ extern void clocksource_touch_watchdog(void);
extern struct clocksource* clocksource_get_next(void); extern struct clocksource* clocksource_get_next(void);
extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_change_rating(struct clocksource *cs, int rating);
extern void clocksource_resume(void); extern void clocksource_resume(void);
extern struct clocksource * __init __weak clocksource_default_clock(void);
extern void clocksource_mark_unstable(struct clocksource *cs);
#ifdef CONFIG_GENERIC_TIME_VSYSCALL #ifdef CONFIG_GENERIC_TIME_VSYSCALL
extern void update_vsyscall(struct timespec *ts, struct clocksource *c); extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
...@@ -394,4 +292,6 @@ static inline void update_vsyscall_tz(void) ...@@ -394,4 +292,6 @@ static inline void update_vsyscall_tz(void)
} }
#endif #endif
extern void timekeeping_notify(struct clocksource *clock);
#endif /* _LINUX_CLOCKSOURCE_H */ #endif /* _LINUX_CLOCKSOURCE_H */
...@@ -91,7 +91,6 @@ enum hrtimer_restart { ...@@ -91,7 +91,6 @@ enum hrtimer_restart {
* @function: timer expiry callback function * @function: timer expiry callback function
* @base: pointer to the timer base (per cpu and per clock) * @base: pointer to the timer base (per cpu and per clock)
* @state: state information (See bit values above) * @state: state information (See bit values above)
* @cb_entry: list head to enqueue an expired timer into the callback list
* @start_site: timer statistics field to store the site where the timer * @start_site: timer statistics field to store the site where the timer
* was started * was started
* @start_comm: timer statistics field to store the name of the process which * @start_comm: timer statistics field to store the name of the process which
...@@ -108,7 +107,6 @@ struct hrtimer { ...@@ -108,7 +107,6 @@ struct hrtimer {
enum hrtimer_restart (*function)(struct hrtimer *); enum hrtimer_restart (*function)(struct hrtimer *);
struct hrtimer_clock_base *base; struct hrtimer_clock_base *base;
unsigned long state; unsigned long state;
struct list_head cb_entry;
#ifdef CONFIG_TIMER_STATS #ifdef CONFIG_TIMER_STATS
int start_pid; int start_pid;
void *start_site; void *start_site;
......
...@@ -75,7 +75,7 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon, ...@@ -75,7 +75,7 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon,
const unsigned int day, const unsigned int hour, const unsigned int day, const unsigned int hour,
const unsigned int min, const unsigned int sec); const unsigned int min, const unsigned int sec);
extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec);
extern struct timespec timespec_add_safe(const struct timespec lhs, extern struct timespec timespec_add_safe(const struct timespec lhs,
const struct timespec rhs); const struct timespec rhs);
...@@ -101,7 +101,8 @@ extern struct timespec xtime; ...@@ -101,7 +101,8 @@ extern struct timespec xtime;
extern struct timespec wall_to_monotonic; extern struct timespec wall_to_monotonic;
extern seqlock_t xtime_lock; extern seqlock_t xtime_lock;
extern unsigned long read_persistent_clock(void); extern void read_persistent_clock(struct timespec *ts);
extern void read_boot_clock(struct timespec *ts);
extern int update_persistent_clock(struct timespec now); extern int update_persistent_clock(struct timespec now);
extern int no_sync_cmos_clock __read_mostly; extern int no_sync_cmos_clock __read_mostly;
void timekeeping_init(void); void timekeeping_init(void);
...@@ -109,6 +110,8 @@ extern int timekeeping_suspended; ...@@ -109,6 +110,8 @@ extern int timekeeping_suspended;
unsigned long get_seconds(void); unsigned long get_seconds(void);
struct timespec current_kernel_time(void); struct timespec current_kernel_time(void);
struct timespec __current_kernel_time(void); /* does not hold xtime_lock */
struct timespec get_monotonic_coarse(void);
#define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME (current_kernel_time())
#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 })
...@@ -147,6 +150,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); ...@@ -147,6 +150,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
extern int timekeeping_valid_for_hres(void); extern int timekeeping_valid_for_hres(void);
extern void update_wall_time(void); extern void update_wall_time(void);
extern void update_xtime_cache(u64 nsec); extern void update_xtime_cache(u64 nsec);
extern void timekeeping_leap_insert(int leapsecond);
struct tms; struct tms;
extern void do_sys_times(struct tms *); extern void do_sys_times(struct tms *);
...@@ -241,6 +245,8 @@ struct itimerval { ...@@ -241,6 +245,8 @@ struct itimerval {
#define CLOCK_PROCESS_CPUTIME_ID 2 #define CLOCK_PROCESS_CPUTIME_ID 2
#define CLOCK_THREAD_CPUTIME_ID 3 #define CLOCK_THREAD_CPUTIME_ID 3
#define CLOCK_MONOTONIC_RAW 4 #define CLOCK_MONOTONIC_RAW 4
#define CLOCK_REALTIME_COARSE 5
#define CLOCK_MONOTONIC_COARSE 6
/* /*
* The IDs of various hardware clocks: * The IDs of various hardware clocks:
......
...@@ -173,11 +173,6 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); ...@@ -173,11 +173,6 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
*/ */
#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1)
/*
* Return when the next timer-wheel timeout occurs (in absolute jiffies),
* locks the timer base:
*/
extern unsigned long next_timer_interrupt(void);
/* /*
* Return when the next timer-wheel timeout occurs (in absolute jiffies), * Return when the next timer-wheel timeout occurs (in absolute jiffies),
* locks the timer base and does the comparison against the given * locks the timer base and does the comparison against the given
......
...@@ -48,37 +48,6 @@ ...@@ -48,37 +48,6 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
/**
* ktime_get - get the monotonic time in ktime_t format
*
* returns the time in ktime_t format
*/
ktime_t ktime_get(void)
{
struct timespec now;
ktime_get_ts(&now);
return timespec_to_ktime(now);
}
EXPORT_SYMBOL_GPL(ktime_get);
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
*
* returns the time in ktime_t format
*/
ktime_t ktime_get_real(void)
{
struct timespec now;
getnstimeofday(&now);
return timespec_to_ktime(now);
}
EXPORT_SYMBOL_GPL(ktime_get_real);
/* /*
* The timer bases: * The timer bases:
* *
...@@ -106,31 +75,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = ...@@ -106,31 +75,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
} }
}; };
/**
* ktime_get_ts - get the monotonic clock in timespec format
* @ts: pointer to timespec variable
*
* The function calculates the monotonic clock from the realtime
* clock and the wall_to_monotonic offset and stores the result
* in normalized timespec format in the variable pointed to by @ts.
*/
void ktime_get_ts(struct timespec *ts)
{
struct timespec tomono;
unsigned long seq;
do {
seq = read_seqbegin(&xtime_lock);
getnstimeofday(ts);
tomono = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));
set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
ts->tv_nsec + tomono.tv_nsec);
}
EXPORT_SYMBOL_GPL(ktime_get_ts);
/* /*
* Get the coarse grained time at the softirq based on xtime and * Get the coarse grained time at the softirq based on xtime and
* wall_to_monotonic. * wall_to_monotonic.
...@@ -1155,7 +1099,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, ...@@ -1155,7 +1099,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
clock_id = CLOCK_MONOTONIC; clock_id = CLOCK_MONOTONIC;
timer->base = &cpu_base->clock_base[clock_id]; timer->base = &cpu_base->clock_base[clock_id];
INIT_LIST_HEAD(&timer->cb_entry);
hrtimer_init_timer_hres(timer); hrtimer_init_timer_hres(timer);
#ifdef CONFIG_TIMER_STATS #ifdef CONFIG_TIMER_STATS
......
...@@ -242,6 +242,25 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) ...@@ -242,6 +242,25 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
return 0; return 0;
} }
static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp)
{
*tp = current_kernel_time();
return 0;
}
static int posix_get_monotonic_coarse(clockid_t which_clock,
struct timespec *tp)
{
*tp = get_monotonic_coarse();
return 0;
}
int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
{
*tp = ktime_to_timespec(KTIME_LOW_RES);
return 0;
}
/* /*
* Initialize everything, well, just everything in Posix clocks/timers ;) * Initialize everything, well, just everything in Posix clocks/timers ;)
*/ */
...@@ -262,10 +281,26 @@ static __init int init_posix_timers(void) ...@@ -262,10 +281,26 @@ static __init int init_posix_timers(void)
.timer_create = no_timer_create, .timer_create = no_timer_create,
.nsleep = no_nsleep, .nsleep = no_nsleep,
}; };
struct k_clock clock_realtime_coarse = {
.clock_getres = posix_get_coarse_res,
.clock_get = posix_get_realtime_coarse,
.clock_set = do_posix_clock_nosettime,
.timer_create = no_timer_create,
.nsleep = no_nsleep,
};
struct k_clock clock_monotonic_coarse = {
.clock_getres = posix_get_coarse_res,
.clock_get = posix_get_monotonic_coarse,
.clock_set = do_posix_clock_nosettime,
.timer_create = no_timer_create,
.nsleep = no_nsleep,
};
register_posix_clock(CLOCK_REALTIME, &clock_realtime); register_posix_clock(CLOCK_REALTIME, &clock_realtime);
register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
posix_timers_cache = kmem_cache_create("posix_timers_cache", posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, SLAB_PANIC, sizeof (struct k_itimer), 0, SLAB_PANIC,
......
...@@ -370,13 +370,20 @@ EXPORT_SYMBOL(mktime); ...@@ -370,13 +370,20 @@ EXPORT_SYMBOL(mktime);
* 0 <= tv_nsec < NSEC_PER_SEC * 0 <= tv_nsec < NSEC_PER_SEC
* For negative values only the tv_sec field is negative ! * For negative values only the tv_sec field is negative !
*/ */
void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
{ {
while (nsec >= NSEC_PER_SEC) { while (nsec >= NSEC_PER_SEC) {
/*
* The following asm() prevents the compiler from
* optimising this loop into a modulo operation. See
* also __iter_div_u64_rem() in include/linux/time.h
*/
asm("" : "+rm"(nsec));
nsec -= NSEC_PER_SEC; nsec -= NSEC_PER_SEC;
++sec; ++sec;
} }
while (nsec < 0) { while (nsec < 0) {
asm("" : "+rm"(nsec));
nsec += NSEC_PER_SEC; nsec += NSEC_PER_SEC;
--sec; --sec;
} }
......
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
* *
* TODO WishList: * TODO WishList:
* o Allow clocksource drivers to be unregistered * o Allow clocksource drivers to be unregistered
* o get rid of clocksource_jiffies extern
*/ */
#include <linux/clocksource.h> #include <linux/clocksource.h>
...@@ -30,6 +29,7 @@ ...@@ -30,6 +29,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/kthread.h>
void timecounter_init(struct timecounter *tc, void timecounter_init(struct timecounter *tc,
const struct cyclecounter *cc, const struct cyclecounter *cc,
...@@ -107,50 +107,35 @@ u64 timecounter_cyc2time(struct timecounter *tc, ...@@ -107,50 +107,35 @@ u64 timecounter_cyc2time(struct timecounter *tc,
} }
EXPORT_SYMBOL(timecounter_cyc2time); EXPORT_SYMBOL(timecounter_cyc2time);
/* XXX - Would like a better way for initializing curr_clocksource */
extern struct clocksource clocksource_jiffies;
/*[Clocksource internal variables]--------- /*[Clocksource internal variables]---------
* curr_clocksource: * curr_clocksource:
* currently selected clocksource. Initialized to clocksource_jiffies. * currently selected clocksource.
* next_clocksource:
* pending next selected clocksource.
* clocksource_list: * clocksource_list:
* linked list with the registered clocksources * linked list with the registered clocksources
* clocksource_lock: * clocksource_mutex:
* protects manipulations to curr_clocksource and next_clocksource * protects manipulations to curr_clocksource and the clocksource_list
* and the clocksource_list
* override_name: * override_name:
* Name of the user-specified clocksource. * Name of the user-specified clocksource.
*/ */
static struct clocksource *curr_clocksource = &clocksource_jiffies; static struct clocksource *curr_clocksource;
static struct clocksource *next_clocksource;
static struct clocksource *clocksource_override;
static LIST_HEAD(clocksource_list); static LIST_HEAD(clocksource_list);
static DEFINE_SPINLOCK(clocksource_lock); static DEFINE_MUTEX(clocksource_mutex);
static char override_name[32]; static char override_name[32];
static int finished_booting; static int finished_booting;
/* clocksource_done_booting - Called near the end of core bootup
*
* Hack to avoid lots of clocksource churn at boot time.
* We use fs_initcall because we want this to start before
* device_initcall but after subsys_initcall.
*/
static int __init clocksource_done_booting(void)
{
finished_booting = 1;
return 0;
}
fs_initcall(clocksource_done_booting);
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
static LIST_HEAD(watchdog_list); static LIST_HEAD(watchdog_list);
static struct clocksource *watchdog; static struct clocksource *watchdog;
static struct timer_list watchdog_timer; static struct timer_list watchdog_timer;
static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
static DEFINE_SPINLOCK(watchdog_lock); static DEFINE_SPINLOCK(watchdog_lock);
static cycle_t watchdog_last; static cycle_t watchdog_last;
static unsigned long watchdog_resumed; static int watchdog_running;
static int clocksource_watchdog_kthread(void *data);
static void __clocksource_change_rating(struct clocksource *cs, int rating);
/* /*
* Interval: 0.5sec Threshold: 0.0625s * Interval: 0.5sec Threshold: 0.0625s
...@@ -158,135 +143,249 @@ static unsigned long watchdog_resumed; ...@@ -158,135 +143,249 @@ static unsigned long watchdog_resumed;
#define WATCHDOG_INTERVAL (HZ >> 1) #define WATCHDOG_INTERVAL (HZ >> 1)
#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
static void clocksource_ratewd(struct clocksource *cs, int64_t delta) static void clocksource_watchdog_work(struct work_struct *work)
{ {
if (delta > -WATCHDOG_THRESHOLD && delta < WATCHDOG_THRESHOLD) /*
return; * If kthread_run fails the next watchdog scan over the
* watchdog_list will find the unstable clock again.
*/
kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
}
static void __clocksource_unstable(struct clocksource *cs)
{
cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
cs->flags |= CLOCK_SOURCE_UNSTABLE;
if (finished_booting)
schedule_work(&watchdog_work);
}
static void clocksource_unstable(struct clocksource *cs, int64_t delta)
{
printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
cs->name, delta); cs->name, delta);
cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); __clocksource_unstable(cs);
clocksource_change_rating(cs, 0); }
list_del(&cs->wd_list);
/**
* clocksource_mark_unstable - mark clocksource unstable via watchdog
* @cs: clocksource to be marked unstable
*
* This function is called instead of clocksource_change_rating from
* cpu hotplug code to avoid a deadlock between the clocksource mutex
* and the cpu hotplug mutex. It defers the update of the clocksource
* to the watchdog thread.
*/
void clocksource_mark_unstable(struct clocksource *cs)
{
unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
if (list_empty(&cs->wd_list))
list_add(&cs->wd_list, &watchdog_list);
__clocksource_unstable(cs);
}
spin_unlock_irqrestore(&watchdog_lock, flags);
} }
static void clocksource_watchdog(unsigned long data) static void clocksource_watchdog(unsigned long data)
{ {
struct clocksource *cs, *tmp; struct clocksource *cs;
cycle_t csnow, wdnow; cycle_t csnow, wdnow;
int64_t wd_nsec, cs_nsec; int64_t wd_nsec, cs_nsec;
int resumed; int next_cpu;
spin_lock(&watchdog_lock); spin_lock(&watchdog_lock);
if (!watchdog_running)
resumed = test_and_clear_bit(0, &watchdog_resumed); goto out;
wdnow = watchdog->read(watchdog); wdnow = watchdog->read(watchdog);
wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask,
watchdog->mult, watchdog->shift);
watchdog_last = wdnow; watchdog_last = wdnow;
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { list_for_each_entry(cs, &watchdog_list, wd_list) {
csnow = cs->read(cs);
if (unlikely(resumed)) { /* Clocksource already marked unstable? */
cs->wd_last = csnow; if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
if (finished_booting)
schedule_work(&watchdog_work);
continue; continue;
} }
/* Initialized ? */ csnow = cs->read(cs);
/* Clocksource initialized ? */
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
(watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
/*
* We just marked the clocksource as
* highres-capable, notify the rest of the
* system as well so that we transition
* into high-res mode:
*/
tick_clock_notify();
}
cs->flags |= CLOCK_SOURCE_WATCHDOG; cs->flags |= CLOCK_SOURCE_WATCHDOG;
cs->wd_last = csnow; cs->wd_last = csnow;
} else { continue;
cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask);
cs->wd_last = csnow;
/* Check the delta. Might remove from the list ! */
clocksource_ratewd(cs, cs_nsec - wd_nsec);
} }
}
if (!list_empty(&watchdog_list)) { /* Check the deviation from the watchdog clocksource. */
/* cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) &
* Cycle through CPUs to check if the CPUs stay cs->mask, cs->mult, cs->shift);
* synchronized to each other. cs->wd_last = csnow;
*/ if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
int next_cpu = cpumask_next(raw_smp_processor_id(), clocksource_unstable(cs, cs_nsec - wd_nsec);
cpu_online_mask); continue;
}
if (next_cpu >= nr_cpu_ids) if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
next_cpu = cpumask_first(cpu_online_mask); (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
watchdog_timer.expires += WATCHDOG_INTERVAL; (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
add_timer_on(&watchdog_timer, next_cpu); cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
/*
* We just marked the clocksource as highres-capable,
* notify the rest of the system as well so that we
* transition into high-res mode:
*/
tick_clock_notify();
}
} }
/*
* Cycle through CPUs to check if the CPUs stay synchronized
* to each other.
*/
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(cpu_online_mask);
watchdog_timer.expires += WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer, next_cpu);
out:
spin_unlock(&watchdog_lock); spin_unlock(&watchdog_lock);
} }
static inline void clocksource_start_watchdog(void)
{
if (watchdog_running || !watchdog || list_empty(&watchdog_list))
return;
init_timer(&watchdog_timer);
watchdog_timer.function = clocksource_watchdog;
watchdog_last = watchdog->read(watchdog);
watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
watchdog_running = 1;
}
static inline void clocksource_stop_watchdog(void)
{
if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
return;
del_timer(&watchdog_timer);
watchdog_running = 0;
}
static inline void clocksource_reset_watchdog(void)
{
struct clocksource *cs;
list_for_each_entry(cs, &watchdog_list, wd_list)
cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
}
static void clocksource_resume_watchdog(void) static void clocksource_resume_watchdog(void)
{ {
set_bit(0, &watchdog_resumed); unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
clocksource_reset_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
} }
static void clocksource_check_watchdog(struct clocksource *cs) static void clocksource_enqueue_watchdog(struct clocksource *cs)
{ {
struct clocksource *cse;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags); spin_lock_irqsave(&watchdog_lock, flags);
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
int started = !list_empty(&watchdog_list); /* cs is a clocksource to be watched. */
list_add(&cs->wd_list, &watchdog_list); list_add(&cs->wd_list, &watchdog_list);
if (!started && watchdog) { cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
watchdog_last = watchdog->read(watchdog);
watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer,
cpumask_first(cpu_online_mask));
}
} else { } else {
/* cs is a watchdog. */
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
/* Pick the best watchdog. */
if (!watchdog || cs->rating > watchdog->rating) { if (!watchdog || cs->rating > watchdog->rating) {
if (watchdog)
del_timer(&watchdog_timer);
watchdog = cs; watchdog = cs;
init_timer(&watchdog_timer);
watchdog_timer.function = clocksource_watchdog;
/* Reset watchdog cycles */ /* Reset watchdog cycles */
list_for_each_entry(cse, &watchdog_list, wd_list) clocksource_reset_watchdog();
cse->flags &= ~CLOCK_SOURCE_WATCHDOG; }
/* Start if list is not empty */ }
if (!list_empty(&watchdog_list)) { /* Check if the watchdog timer needs to be started. */
watchdog_last = watchdog->read(watchdog); clocksource_start_watchdog();
watchdog_timer.expires = spin_unlock_irqrestore(&watchdog_lock, flags);
jiffies + WATCHDOG_INTERVAL; }
add_timer_on(&watchdog_timer,
cpumask_first(cpu_online_mask)); static void clocksource_dequeue_watchdog(struct clocksource *cs)
} {
struct clocksource *tmp;
unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
/* cs is a watched clocksource. */
list_del_init(&cs->wd_list);
} else if (cs == watchdog) {
/* Reset watchdog cycles */
clocksource_reset_watchdog();
/* Current watchdog is removed. Find an alternative. */
watchdog = NULL;
list_for_each_entry(tmp, &clocksource_list, list) {
if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY)
continue;
if (!watchdog || tmp->rating > watchdog->rating)
watchdog = tmp;
} }
} }
cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
/* Check if the watchdog timer needs to be stopped. */
clocksource_stop_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags); spin_unlock_irqrestore(&watchdog_lock, flags);
} }
#else
static void clocksource_check_watchdog(struct clocksource *cs) static int clocksource_watchdog_kthread(void *data)
{
struct clocksource *cs, *tmp;
unsigned long flags;
LIST_HEAD(unstable);
mutex_lock(&clocksource_mutex);
spin_lock_irqsave(&watchdog_lock, flags);
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
list_del_init(&cs->wd_list);
list_add(&cs->wd_list, &unstable);
}
/* Check if the watchdog timer needs to be stopped. */
clocksource_stop_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
/* Needs to be done outside of watchdog lock */
list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
list_del_init(&cs->wd_list);
__clocksource_change_rating(cs, 0);
}
mutex_unlock(&clocksource_mutex);
return 0;
}
#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
static void clocksource_enqueue_watchdog(struct clocksource *cs)
{ {
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
} }
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { } static inline void clocksource_resume_watchdog(void) { }
#endif static inline int clocksource_watchdog_kthread(void *data) { return 0; }
#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
/** /**
* clocksource_resume - resume the clocksource(s) * clocksource_resume - resume the clocksource(s)
...@@ -294,18 +393,16 @@ static inline void clocksource_resume_watchdog(void) { } ...@@ -294,18 +393,16 @@ static inline void clocksource_resume_watchdog(void) { }
void clocksource_resume(void) void clocksource_resume(void)
{ {
struct clocksource *cs; struct clocksource *cs;
unsigned long flags;
spin_lock_irqsave(&clocksource_lock, flags); mutex_lock(&clocksource_mutex);
list_for_each_entry(cs, &clocksource_list, list) { list_for_each_entry(cs, &clocksource_list, list)
if (cs->resume) if (cs->resume)
cs->resume(); cs->resume();
}
clocksource_resume_watchdog(); clocksource_resume_watchdog();
spin_unlock_irqrestore(&clocksource_lock, flags); mutex_unlock(&clocksource_mutex);
} }
/** /**
...@@ -320,75 +417,94 @@ void clocksource_touch_watchdog(void) ...@@ -320,75 +417,94 @@ void clocksource_touch_watchdog(void)
clocksource_resume_watchdog(); clocksource_resume_watchdog();
} }
#ifdef CONFIG_GENERIC_TIME
/** /**
* clocksource_get_next - Returns the selected clocksource * clocksource_select - Select the best clocksource available
*
* Private function. Must hold clocksource_mutex when called.
* *
* Select the clocksource with the best rating, or the clocksource,
* which is selected by userspace override.
*/ */
struct clocksource *clocksource_get_next(void) static void clocksource_select(void)
{ {
unsigned long flags; struct clocksource *best, *cs;
spin_lock_irqsave(&clocksource_lock, flags); if (!finished_booting || list_empty(&clocksource_list))
if (next_clocksource && finished_booting) { return;
curr_clocksource = next_clocksource; /* First clocksource on the list has the best rating. */
next_clocksource = NULL; best = list_first_entry(&clocksource_list, struct clocksource, list);
/* Check for the override clocksource. */
list_for_each_entry(cs, &clocksource_list, list) {
if (strcmp(cs->name, override_name) != 0)
continue;
/*
* Check to make sure we don't switch to a non-highres
* capable clocksource if the tick code is in oneshot
* mode (highres or nohz)
*/
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
tick_oneshot_mode_active()) {
/* Override clocksource cannot be used. */
printk(KERN_WARNING "Override clocksource %s is not "
"HRT compatible. Cannot switch while in "
"HRT/NOHZ mode\n", cs->name);
override_name[0] = 0;
} else
/* Override clocksource can be used. */
best = cs;
break;
}
if (curr_clocksource != best) {
printk(KERN_INFO "Switching to clocksource %s\n", best->name);
curr_clocksource = best;
timekeeping_notify(curr_clocksource);
} }
spin_unlock_irqrestore(&clocksource_lock, flags);
return curr_clocksource;
} }
/** #else /* CONFIG_GENERIC_TIME */
* select_clocksource - Selects the best registered clocksource.
* static inline void clocksource_select(void) { }
* Private function. Must hold clocksource_lock when called.
#endif
/*
* clocksource_done_booting - Called near the end of core bootup
* *
* Select the clocksource with the best rating, or the clocksource, * Hack to avoid lots of clocksource churn at boot time.
* which is selected by userspace override. * We use fs_initcall because we want this to start before
* device_initcall but after subsys_initcall.
*/ */
static struct clocksource *select_clocksource(void) static int __init clocksource_done_booting(void)
{ {
struct clocksource *next; finished_booting = 1;
if (list_empty(&clocksource_list))
return NULL;
if (clocksource_override)
next = clocksource_override;
else
next = list_entry(clocksource_list.next, struct clocksource,
list);
if (next == curr_clocksource) /*
return NULL; * Run the watchdog first to eliminate unstable clock sources
*/
clocksource_watchdog_kthread(NULL);
return next; mutex_lock(&clocksource_mutex);
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
} }
fs_initcall(clocksource_done_booting);
/* /*
* Enqueue the clocksource sorted by rating * Enqueue the clocksource sorted by rating
*/ */
static int clocksource_enqueue(struct clocksource *c) static void clocksource_enqueue(struct clocksource *cs)
{ {
struct list_head *tmp, *entry = &clocksource_list; struct list_head *entry = &clocksource_list;
struct clocksource *tmp;
list_for_each(tmp, &clocksource_list) { list_for_each_entry(tmp, &clocksource_list, list)
struct clocksource *cs;
cs = list_entry(tmp, struct clocksource, list);
if (cs == c)
return -EBUSY;
/* Keep track of the place, where to insert */ /* Keep track of the place, where to insert */
if (cs->rating >= c->rating) if (tmp->rating >= cs->rating)
entry = tmp; entry = &tmp->list;
} list_add(&cs->list, entry);
list_add(&c->list, entry);
if (strlen(c->name) == strlen(override_name) &&
!strcmp(c->name, override_name))
clocksource_override = c;
return 0;
} }
/** /**
...@@ -397,52 +513,48 @@ static int clocksource_enqueue(struct clocksource *c) ...@@ -397,52 +513,48 @@ static int clocksource_enqueue(struct clocksource *c)
* *
* Returns -EBUSY if registration fails, zero otherwise. * Returns -EBUSY if registration fails, zero otherwise.
*/ */
int clocksource_register(struct clocksource *c) int clocksource_register(struct clocksource *cs)
{ {
unsigned long flags; mutex_lock(&clocksource_mutex);
int ret; clocksource_enqueue(cs);
clocksource_select();
spin_lock_irqsave(&clocksource_lock, flags); clocksource_enqueue_watchdog(cs);
ret = clocksource_enqueue(c); mutex_unlock(&clocksource_mutex);
if (!ret) return 0;
next_clocksource = select_clocksource();
spin_unlock_irqrestore(&clocksource_lock, flags);
if (!ret)
clocksource_check_watchdog(c);
return ret;
} }
EXPORT_SYMBOL(clocksource_register); EXPORT_SYMBOL(clocksource_register);
static void __clocksource_change_rating(struct clocksource *cs, int rating)
{
list_del(&cs->list);
cs->rating = rating;
clocksource_enqueue(cs);
clocksource_select();
}
/** /**
* clocksource_change_rating - Change the rating of a registered clocksource * clocksource_change_rating - Change the rating of a registered clocksource
*
*/ */
void clocksource_change_rating(struct clocksource *cs, int rating) void clocksource_change_rating(struct clocksource *cs, int rating)
{ {
unsigned long flags; mutex_lock(&clocksource_mutex);
__clocksource_change_rating(cs, rating);
spin_lock_irqsave(&clocksource_lock, flags); mutex_unlock(&clocksource_mutex);
list_del(&cs->list);
cs->rating = rating;
clocksource_enqueue(cs);
next_clocksource = select_clocksource();
spin_unlock_irqrestore(&clocksource_lock, flags);
} }
EXPORT_SYMBOL(clocksource_change_rating);
/** /**
* clocksource_unregister - remove a registered clocksource * clocksource_unregister - remove a registered clocksource
*/ */
void clocksource_unregister(struct clocksource *cs) void clocksource_unregister(struct clocksource *cs)
{ {
unsigned long flags; mutex_lock(&clocksource_mutex);
clocksource_dequeue_watchdog(cs);
spin_lock_irqsave(&clocksource_lock, flags);
list_del(&cs->list); list_del(&cs->list);
if (clocksource_override == cs) clocksource_select();
clocksource_override = NULL; mutex_unlock(&clocksource_mutex);
next_clocksource = select_clocksource();
spin_unlock_irqrestore(&clocksource_lock, flags);
} }
EXPORT_SYMBOL(clocksource_unregister);
#ifdef CONFIG_SYSFS #ifdef CONFIG_SYSFS
/** /**
...@@ -458,9 +570,9 @@ sysfs_show_current_clocksources(struct sys_device *dev, ...@@ -458,9 +570,9 @@ sysfs_show_current_clocksources(struct sys_device *dev,
{ {
ssize_t count = 0; ssize_t count = 0;
spin_lock_irq(&clocksource_lock); mutex_lock(&clocksource_mutex);
count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
spin_unlock_irq(&clocksource_lock); mutex_unlock(&clocksource_mutex);
return count; return count;
} }
...@@ -478,9 +590,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, ...@@ -478,9 +590,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
struct sysdev_attribute *attr, struct sysdev_attribute *attr,
const char *buf, size_t count) const char *buf, size_t count)
{ {
struct clocksource *ovr = NULL;
size_t ret = count; size_t ret = count;
int len;
/* strings from sysfs write are not 0 terminated! */ /* strings from sysfs write are not 0 terminated! */
if (count >= sizeof(override_name)) if (count >= sizeof(override_name))
...@@ -490,44 +600,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, ...@@ -490,44 +600,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
if (buf[count-1] == '\n') if (buf[count-1] == '\n')
count--; count--;
spin_lock_irq(&clocksource_lock); mutex_lock(&clocksource_mutex);
if (count > 0) if (count > 0)
memcpy(override_name, buf, count); memcpy(override_name, buf, count);
override_name[count] = 0; override_name[count] = 0;
clocksource_select();
len = strlen(override_name); mutex_unlock(&clocksource_mutex);
if (len) {
struct clocksource *cs;
ovr = clocksource_override;
/* try to select it: */
list_for_each_entry(cs, &clocksource_list, list) {
if (strlen(cs->name) == len &&
!strcmp(cs->name, override_name))
ovr = cs;
}
}
/*
* Check to make sure we don't switch to a non-highres capable
* clocksource if the tick code is in oneshot mode (highres or nohz)
*/
if (tick_oneshot_mode_active() && ovr &&
!(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
printk(KERN_WARNING "%s clocksource is not HRT compatible. "
"Cannot switch while in HRT/NOHZ mode\n", ovr->name);
ovr = NULL;
override_name[0] = 0;
}
/* Reselect, when the override name has changed */
if (ovr != clocksource_override) {
clocksource_override = ovr;
next_clocksource = select_clocksource();
}
spin_unlock_irq(&clocksource_lock);
return ret; return ret;
} }
...@@ -547,7 +627,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, ...@@ -547,7 +627,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
struct clocksource *src; struct clocksource *src;
ssize_t count = 0; ssize_t count = 0;
spin_lock_irq(&clocksource_lock); mutex_lock(&clocksource_mutex);
list_for_each_entry(src, &clocksource_list, list) { list_for_each_entry(src, &clocksource_list, list) {
/* /*
* Don't show non-HRES clocksource if the tick code is * Don't show non-HRES clocksource if the tick code is
...@@ -559,7 +639,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, ...@@ -559,7 +639,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
max((ssize_t)PAGE_SIZE - count, (ssize_t)0), max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
"%s ", src->name); "%s ", src->name);
} }
spin_unlock_irq(&clocksource_lock); mutex_unlock(&clocksource_mutex);
count += snprintf(buf + count, count += snprintf(buf + count,
max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
...@@ -614,11 +694,10 @@ device_initcall(init_clocksource_sysfs); ...@@ -614,11 +694,10 @@ device_initcall(init_clocksource_sysfs);
*/ */
static int __init boot_override_clocksource(char* str) static int __init boot_override_clocksource(char* str)
{ {
unsigned long flags; mutex_lock(&clocksource_mutex);
spin_lock_irqsave(&clocksource_lock, flags);
if (str) if (str)
strlcpy(override_name, str, sizeof(override_name)); strlcpy(override_name, str, sizeof(override_name));
spin_unlock_irqrestore(&clocksource_lock, flags); mutex_unlock(&clocksource_mutex);
return 1; return 1;
} }
......
...@@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = { ...@@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = {
.read = jiffies_read, .read = jiffies_read,
.mask = 0xffffffff, /*32bits*/ .mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT,
.shift = JIFFIES_SHIFT, .shift = JIFFIES_SHIFT,
}; };
...@@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void) ...@@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void)
} }
core_initcall(init_jiffies_clocksource); core_initcall(init_jiffies_clocksource);
struct clocksource * __init __weak clocksource_default_clock(void)
{
return &clocksource_jiffies;
}
...@@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) ...@@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
case TIME_OK: case TIME_OK:
break; break;
case TIME_INS: case TIME_INS:
xtime.tv_sec--; timekeeping_leap_insert(-1);
wall_to_monotonic.tv_sec++;
time_state = TIME_OOP; time_state = TIME_OOP;
printk(KERN_NOTICE printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n"); "Clock: inserting leap second 23:59:60 UTC\n");
...@@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) ...@@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
res = HRTIMER_RESTART; res = HRTIMER_RESTART;
break; break;
case TIME_DEL: case TIME_DEL:
xtime.tv_sec++; timekeeping_leap_insert(1);
time_tai--; time_tai--;
wall_to_monotonic.tv_sec--;
time_state = TIME_WAIT; time_state = TIME_WAIT;
printk(KERN_NOTICE printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n"); "Clock: deleting leap second 23:59:59 UTC\n");
...@@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) ...@@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
time_state = TIME_OK; time_state = TIME_OK;
break; break;
} }
update_vsyscall(&xtime, clock);
write_sequnlock(&xtime_lock); write_sequnlock(&xtime_lock);
......
...@@ -18,7 +18,117 @@ ...@@ -18,7 +18,117 @@
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/stop_machine.h>
/* Structure holding internal timekeeping values. */
struct timekeeper {
/* Current clocksource used for timekeeping. */
struct clocksource *clock;
/* The shift value of the current clocksource. */
int shift;
/* Number of clock cycles in one NTP interval. */
cycle_t cycle_interval;
/* Number of clock shifted nano seconds in one NTP interval. */
u64 xtime_interval;
/* Raw nano seconds accumulated per NTP interval. */
u32 raw_interval;
/* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */
u64 xtime_nsec;
/* Difference between accumulated time and NTP time in ntp
* shifted nano seconds. */
s64 ntp_error;
/* Shift conversion between clock shifted nano seconds and
* ntp shifted nano seconds. */
int ntp_error_shift;
/* NTP adjusted clock multiplier */
u32 mult;
};
struct timekeeper timekeeper;
/**
* timekeeper_setup_internals - Set up internals to use clocksource clock.
*
* @clock: Pointer to clocksource.
*
* Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
* pair and interval request.
*
* Unless you're the timekeeping code, you should not be using this!
*/
static void timekeeper_setup_internals(struct clocksource *clock)
{
cycle_t interval;
u64 tmp;
timekeeper.clock = clock;
clock->cycle_last = clock->read(clock);
/* Do the ns -> cycle conversion first, using original mult */
tmp = NTP_INTERVAL_LENGTH;
tmp <<= clock->shift;
tmp += clock->mult/2;
do_div(tmp, clock->mult);
if (tmp == 0)
tmp = 1;
interval = (cycle_t) tmp;
timekeeper.cycle_interval = interval;
/* Go back from cycles -> shifted ns */
timekeeper.xtime_interval = (u64) interval * clock->mult;
timekeeper.raw_interval =
((u64) interval * clock->mult) >> clock->shift;
timekeeper.xtime_nsec = 0;
timekeeper.shift = clock->shift;
timekeeper.ntp_error = 0;
timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
/*
* The timekeeper keeps its own mult values for the currently
* active clocksource. These value will be adjusted via NTP
* to counteract clock drifting.
*/
timekeeper.mult = clock->mult;
}
/* Timekeeper helper functions. */
static inline s64 timekeeping_get_ns(void)
{
cycle_t cycle_now, cycle_delta;
struct clocksource *clock;
/* read clocksource: */
clock = timekeeper.clock;
cycle_now = clock->read(clock);
/* calculate the delta since the last update_wall_time: */
cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
/* return delta convert to nanoseconds using ntp adjusted mult. */
return clocksource_cyc2ns(cycle_delta, timekeeper.mult,
timekeeper.shift);
}
static inline s64 timekeeping_get_ns_raw(void)
{
cycle_t cycle_now, cycle_delta;
struct clocksource *clock;
/* read clocksource: */
clock = timekeeper.clock;
cycle_now = clock->read(clock);
/* calculate the delta since the last update_wall_time: */
cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
/* return delta convert to nanoseconds using ntp adjusted mult. */
return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
}
/* /*
* This read-write spinlock protects us from races in SMP while * This read-write spinlock protects us from races in SMP while
...@@ -44,7 +154,12 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); ...@@ -44,7 +154,12 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
*/ */
struct timespec xtime __attribute__ ((aligned (16))); struct timespec xtime __attribute__ ((aligned (16)));
struct timespec wall_to_monotonic __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
static unsigned long total_sleep_time; /* seconds */ static struct timespec total_sleep_time;
/*
* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
*/
struct timespec raw_time;
/* flag for if timekeeping is suspended */ /* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended; int __read_mostly timekeeping_suspended;
...@@ -56,35 +171,44 @@ void update_xtime_cache(u64 nsec) ...@@ -56,35 +171,44 @@ void update_xtime_cache(u64 nsec)
timespec_add_ns(&xtime_cache, nsec); timespec_add_ns(&xtime_cache, nsec);
} }
struct clocksource *clock; /* must hold xtime_lock */
void timekeeping_leap_insert(int leapsecond)
{
xtime.tv_sec += leapsecond;
wall_to_monotonic.tv_sec -= leapsecond;
update_vsyscall(&xtime, timekeeper.clock);
}
#ifdef CONFIG_GENERIC_TIME #ifdef CONFIG_GENERIC_TIME
/** /**
* clocksource_forward_now - update clock to the current time * timekeeping_forward_now - update clock to the current time
* *
* Forward the current clock to update its state since the last call to * Forward the current clock to update its state since the last call to
* update_wall_time(). This is useful before significant clock changes, * update_wall_time(). This is useful before significant clock changes,
* as it avoids having to deal with this time offset explicitly. * as it avoids having to deal with this time offset explicitly.
*/ */
static void clocksource_forward_now(void) static void timekeeping_forward_now(void)
{ {
cycle_t cycle_now, cycle_delta; cycle_t cycle_now, cycle_delta;
struct clocksource *clock;
s64 nsec; s64 nsec;
cycle_now = clocksource_read(clock); clock = timekeeper.clock;
cycle_now = clock->read(clock);
cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
clock->cycle_last = cycle_now; clock->cycle_last = cycle_now;
nsec = cyc2ns(clock, cycle_delta); nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
timekeeper.shift);
/* If arch requires, add in gettimeoffset() */ /* If arch requires, add in gettimeoffset() */
nsec += arch_gettimeoffset(); nsec += arch_gettimeoffset();
timespec_add_ns(&xtime, nsec); timespec_add_ns(&xtime, nsec);
nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
clock->raw_time.tv_nsec += nsec; timespec_add_ns(&raw_time, nsec);
} }
/** /**
...@@ -95,7 +219,6 @@ static void clocksource_forward_now(void) ...@@ -95,7 +219,6 @@ static void clocksource_forward_now(void)
*/ */
void getnstimeofday(struct timespec *ts) void getnstimeofday(struct timespec *ts)
{ {
cycle_t cycle_now, cycle_delta;
unsigned long seq; unsigned long seq;
s64 nsecs; s64 nsecs;
...@@ -105,15 +228,7 @@ void getnstimeofday(struct timespec *ts) ...@@ -105,15 +228,7 @@ void getnstimeofday(struct timespec *ts)
seq = read_seqbegin(&xtime_lock); seq = read_seqbegin(&xtime_lock);
*ts = xtime; *ts = xtime;
nsecs = timekeeping_get_ns();
/* read clocksource: */
cycle_now = clocksource_read(clock);
/* calculate the delta since the last update_wall_time: */
cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
/* convert to nanoseconds: */
nsecs = cyc2ns(clock, cycle_delta);
/* If arch requires, add in gettimeoffset() */ /* If arch requires, add in gettimeoffset() */
nsecs += arch_gettimeoffset(); nsecs += arch_gettimeoffset();
...@@ -125,6 +240,57 @@ void getnstimeofday(struct timespec *ts) ...@@ -125,6 +240,57 @@ void getnstimeofday(struct timespec *ts)
EXPORT_SYMBOL(getnstimeofday); EXPORT_SYMBOL(getnstimeofday);
ktime_t ktime_get(void)
{
unsigned int seq;
s64 secs, nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqbegin(&xtime_lock);
secs = xtime.tv_sec + wall_to_monotonic.tv_sec;
nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec;
nsecs += timekeeping_get_ns();
} while (read_seqretry(&xtime_lock, seq));
/*
* Use ktime_set/ktime_add_ns to create a proper ktime on
* 32-bit architectures without CONFIG_KTIME_SCALAR.
*/
return ktime_add_ns(ktime_set(secs, 0), nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get);
/**
* ktime_get_ts - get the monotonic clock in timespec format
* @ts: pointer to timespec variable
*
* The function calculates the monotonic clock from the realtime
* clock and the wall_to_monotonic offset and stores the result
* in normalized timespec format in the variable pointed to by @ts.
*/
void ktime_get_ts(struct timespec *ts)
{
struct timespec tomono;
unsigned int seq;
s64 nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqbegin(&xtime_lock);
*ts = xtime;
tomono = wall_to_monotonic;
nsecs = timekeeping_get_ns();
} while (read_seqretry(&xtime_lock, seq));
set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
ts->tv_nsec + tomono.tv_nsec + nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_ts);
/** /**
* do_gettimeofday - Returns the time of day in a timeval * do_gettimeofday - Returns the time of day in a timeval
* @tv: pointer to the timeval to be set * @tv: pointer to the timeval to be set
...@@ -157,7 +323,7 @@ int do_settimeofday(struct timespec *tv) ...@@ -157,7 +323,7 @@ int do_settimeofday(struct timespec *tv)
write_seqlock_irqsave(&xtime_lock, flags); write_seqlock_irqsave(&xtime_lock, flags);
clocksource_forward_now(); timekeeping_forward_now();
ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec; ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec; ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
...@@ -167,10 +333,10 @@ int do_settimeofday(struct timespec *tv) ...@@ -167,10 +333,10 @@ int do_settimeofday(struct timespec *tv)
update_xtime_cache(0); update_xtime_cache(0);
clock->error = 0; timekeeper.ntp_error = 0;
ntp_clear(); ntp_clear();
update_vsyscall(&xtime, clock); update_vsyscall(&xtime, timekeeper.clock);
write_sequnlock_irqrestore(&xtime_lock, flags); write_sequnlock_irqrestore(&xtime_lock, flags);
...@@ -187,44 +353,97 @@ EXPORT_SYMBOL(do_settimeofday); ...@@ -187,44 +353,97 @@ EXPORT_SYMBOL(do_settimeofday);
* *
* Accumulates current time interval and initializes new clocksource * Accumulates current time interval and initializes new clocksource
*/ */
static void change_clocksource(void) static int change_clocksource(void *data)
{ {
struct clocksource *new, *old; struct clocksource *new, *old;
new = clocksource_get_next(); new = (struct clocksource *) data;
timekeeping_forward_now();
if (!new->enable || new->enable(new) == 0) {
old = timekeeper.clock;
timekeeper_setup_internals(new);
if (old->disable)
old->disable(old);
}
return 0;
}
if (clock == new) /**
* timekeeping_notify - Install a new clock source
* @clock: pointer to the clock source
*
* This function is called from clocksource.c after a new, better clock
* source has been registered. The caller holds the clocksource_mutex.
*/
void timekeeping_notify(struct clocksource *clock)
{
if (timekeeper.clock == clock)
return; return;
stop_machine(change_clocksource, clock, NULL);
tick_clock_notify();
}
clocksource_forward_now(); #else /* GENERIC_TIME */
if (clocksource_enable(new)) static inline void timekeeping_forward_now(void) { }
return;
new->raw_time = clock->raw_time; /**
old = clock; * ktime_get - get the monotonic time in ktime_t format
clock = new; *
clocksource_disable(old); * returns the time in ktime_t format
*/
ktime_t ktime_get(void)
{
struct timespec now;
clock->cycle_last = 0; ktime_get_ts(&now);
clock->cycle_last = clocksource_read(clock);
clock->error = 0;
clock->xtime_nsec = 0;
clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
tick_clock_notify(); return timespec_to_ktime(now);
}
EXPORT_SYMBOL_GPL(ktime_get);
/* /**
* We're holding xtime lock and waking up klogd would deadlock * ktime_get_ts - get the monotonic clock in timespec format
* us on enqueue. So no printing! * @ts: pointer to timespec variable
printk(KERN_INFO "Time: %s clocksource has been installed.\n", *
clock->name); * The function calculates the monotonic clock from the realtime
*/ * clock and the wall_to_monotonic offset and stores the result
* in normalized timespec format in the variable pointed to by @ts.
*/
void ktime_get_ts(struct timespec *ts)
{
struct timespec tomono;
unsigned long seq;
do {
seq = read_seqbegin(&xtime_lock);
getnstimeofday(ts);
tomono = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));
set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
ts->tv_nsec + tomono.tv_nsec);
} }
#else EXPORT_SYMBOL_GPL(ktime_get_ts);
static inline void clocksource_forward_now(void) { }
static inline void change_clocksource(void) { } #endif /* !GENERIC_TIME */
#endif
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
*
* returns the time in ktime_t format
*/
ktime_t ktime_get_real(void)
{
struct timespec now;
getnstimeofday(&now);
return timespec_to_ktime(now);
}
EXPORT_SYMBOL_GPL(ktime_get_real);
/** /**
* getrawmonotonic - Returns the raw monotonic time in a timespec * getrawmonotonic - Returns the raw monotonic time in a timespec
...@@ -236,21 +455,11 @@ void getrawmonotonic(struct timespec *ts) ...@@ -236,21 +455,11 @@ void getrawmonotonic(struct timespec *ts)
{ {
unsigned long seq; unsigned long seq;
s64 nsecs; s64 nsecs;
cycle_t cycle_now, cycle_delta;
do { do {
seq = read_seqbegin(&xtime_lock); seq = read_seqbegin(&xtime_lock);
nsecs = timekeeping_get_ns_raw();
/* read clocksource: */ *ts = raw_time;
cycle_now = clocksource_read(clock);
/* calculate the delta since the last update_wall_time: */
cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
/* convert to nanoseconds: */
nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
*ts = clock->raw_time;
} while (read_seqretry(&xtime_lock, seq)); } while (read_seqretry(&xtime_lock, seq));
...@@ -270,7 +479,7 @@ int timekeeping_valid_for_hres(void) ...@@ -270,7 +479,7 @@ int timekeeping_valid_for_hres(void)
do { do {
seq = read_seqbegin(&xtime_lock); seq = read_seqbegin(&xtime_lock);
ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
} while (read_seqretry(&xtime_lock, seq)); } while (read_seqretry(&xtime_lock, seq));
...@@ -278,17 +487,33 @@ int timekeeping_valid_for_hres(void) ...@@ -278,17 +487,33 @@ int timekeeping_valid_for_hres(void)
} }
/** /**
* read_persistent_clock - Return time in seconds from the persistent clock. * read_persistent_clock - Return time from the persistent clock.
* *
* Weak dummy function for arches that do not yet support it. * Weak dummy function for arches that do not yet support it.
* Returns seconds from epoch using the battery backed persistent clock. * Reads the time from the battery backed persistent clock.
* Returns zero if unsupported. * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
* *
* XXX - Do be sure to remove it once all arches implement it. * XXX - Do be sure to remove it once all arches implement it.
*/ */
unsigned long __attribute__((weak)) read_persistent_clock(void) void __attribute__((weak)) read_persistent_clock(struct timespec *ts)
{ {
return 0; ts->tv_sec = 0;
ts->tv_nsec = 0;
}
/**
* read_boot_clock - Return time of the system start.
*
* Weak dummy function for arches that do not yet support it.
* Function to read the exact time the system has been started.
* Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
*
* XXX - Do be sure to remove it once all arches implement it.
*/
void __attribute__((weak)) read_boot_clock(struct timespec *ts)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
} }
/* /*
...@@ -296,29 +521,40 @@ unsigned long __attribute__((weak)) read_persistent_clock(void) ...@@ -296,29 +521,40 @@ unsigned long __attribute__((weak)) read_persistent_clock(void)
*/ */
void __init timekeeping_init(void) void __init timekeeping_init(void)
{ {
struct clocksource *clock;
unsigned long flags; unsigned long flags;
unsigned long sec = read_persistent_clock(); struct timespec now, boot;
read_persistent_clock(&now);
read_boot_clock(&boot);
write_seqlock_irqsave(&xtime_lock, flags); write_seqlock_irqsave(&xtime_lock, flags);
ntp_init(); ntp_init();
clock = clocksource_get_next(); clock = clocksource_default_clock();
clocksource_enable(clock); if (clock->enable)
clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); clock->enable(clock);
clock->cycle_last = clocksource_read(clock); timekeeper_setup_internals(clock);
xtime.tv_sec = sec; xtime.tv_sec = now.tv_sec;
xtime.tv_nsec = 0; xtime.tv_nsec = now.tv_nsec;
raw_time.tv_sec = 0;
raw_time.tv_nsec = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
boot.tv_sec = xtime.tv_sec;
boot.tv_nsec = xtime.tv_nsec;
}
set_normalized_timespec(&wall_to_monotonic, set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec); -boot.tv_sec, -boot.tv_nsec);
update_xtime_cache(0); update_xtime_cache(0);
total_sleep_time = 0; total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0;
write_sequnlock_irqrestore(&xtime_lock, flags); write_sequnlock_irqrestore(&xtime_lock, flags);
} }
/* time in seconds when suspend began */ /* time in seconds when suspend began */
static unsigned long timekeeping_suspend_time; static struct timespec timekeeping_suspend_time;
/** /**
* timekeeping_resume - Resumes the generic timekeeping subsystem. * timekeeping_resume - Resumes the generic timekeeping subsystem.
...@@ -331,24 +567,24 @@ static unsigned long timekeeping_suspend_time; ...@@ -331,24 +567,24 @@ static unsigned long timekeeping_suspend_time;
static int timekeeping_resume(struct sys_device *dev) static int timekeeping_resume(struct sys_device *dev)
{ {
unsigned long flags; unsigned long flags;
unsigned long now = read_persistent_clock(); struct timespec ts;
read_persistent_clock(&ts);
clocksource_resume(); clocksource_resume();
write_seqlock_irqsave(&xtime_lock, flags); write_seqlock_irqsave(&xtime_lock, flags);
if (now && (now > timekeeping_suspend_time)) { if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
unsigned long sleep_length = now - timekeeping_suspend_time; ts = timespec_sub(ts, timekeeping_suspend_time);
xtime = timespec_add_safe(xtime, ts);
xtime.tv_sec += sleep_length; wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
wall_to_monotonic.tv_sec -= sleep_length; total_sleep_time = timespec_add_safe(total_sleep_time, ts);
total_sleep_time += sleep_length;
} }
update_xtime_cache(0); update_xtime_cache(0);
/* re-base the last cycle value */ /* re-base the last cycle value */
clock->cycle_last = 0; timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
clock->cycle_last = clocksource_read(clock); timekeeper.ntp_error = 0;
clock->error = 0;
timekeeping_suspended = 0; timekeeping_suspended = 0;
write_sequnlock_irqrestore(&xtime_lock, flags); write_sequnlock_irqrestore(&xtime_lock, flags);
...@@ -366,10 +602,10 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) ...@@ -366,10 +602,10 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
{ {
unsigned long flags; unsigned long flags;
timekeeping_suspend_time = read_persistent_clock(); read_persistent_clock(&timekeeping_suspend_time);
write_seqlock_irqsave(&xtime_lock, flags); write_seqlock_irqsave(&xtime_lock, flags);
clocksource_forward_now(); timekeeping_forward_now();
timekeeping_suspended = 1; timekeeping_suspended = 1;
write_sequnlock_irqrestore(&xtime_lock, flags); write_sequnlock_irqrestore(&xtime_lock, flags);
...@@ -404,7 +640,7 @@ device_initcall(timekeeping_init_device); ...@@ -404,7 +640,7 @@ device_initcall(timekeeping_init_device);
* If the error is already larger, we look ahead even further * If the error is already larger, we look ahead even further
* to compensate for late or lost adjustments. * to compensate for late or lost adjustments.
*/ */
static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
s64 *offset) s64 *offset)
{ {
s64 tick_error, i; s64 tick_error, i;
...@@ -420,7 +656,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, ...@@ -420,7 +656,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
* here. This is tuned so that an error of about 1 msec is adjusted * here. This is tuned so that an error of about 1 msec is adjusted
* within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
*/ */
error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
error2 = abs(error2); error2 = abs(error2);
for (look_ahead = 0; error2 > 0; look_ahead++) for (look_ahead = 0; error2 > 0; look_ahead++)
error2 >>= 2; error2 >>= 2;
...@@ -429,8 +665,8 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, ...@@ -429,8 +665,8 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
* Now calculate the error in (1 << look_ahead) ticks, but first * Now calculate the error in (1 << look_ahead) ticks, but first
* remove the single look ahead already included in the error. * remove the single look ahead already included in the error.
*/ */
tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1); tick_error = tick_length >> (timekeeper.ntp_error_shift + 1);
tick_error -= clock->xtime_interval >> 1; tick_error -= timekeeper.xtime_interval >> 1;
error = ((error - tick_error) >> look_ahead) + tick_error; error = ((error - tick_error) >> look_ahead) + tick_error;
/* Finally calculate the adjustment shift value. */ /* Finally calculate the adjustment shift value. */
...@@ -455,18 +691,18 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, ...@@ -455,18 +691,18 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
* this is optimized for the most common adjustments of -1,0,1, * this is optimized for the most common adjustments of -1,0,1,
* for other values we can do a bit more work. * for other values we can do a bit more work.
*/ */
static void clocksource_adjust(s64 offset) static void timekeeping_adjust(s64 offset)
{ {
s64 error, interval = clock->cycle_interval; s64 error, interval = timekeeper.cycle_interval;
int adj; int adj;
error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1); error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1);
if (error > interval) { if (error > interval) {
error >>= 2; error >>= 2;
if (likely(error <= interval)) if (likely(error <= interval))
adj = 1; adj = 1;
else else
adj = clocksource_bigadjust(error, &interval, &offset); adj = timekeeping_bigadjust(error, &interval, &offset);
} else if (error < -interval) { } else if (error < -interval) {
error >>= 2; error >>= 2;
if (likely(error >= -interval)) { if (likely(error >= -interval)) {
...@@ -474,15 +710,15 @@ static void clocksource_adjust(s64 offset) ...@@ -474,15 +710,15 @@ static void clocksource_adjust(s64 offset)
interval = -interval; interval = -interval;
offset = -offset; offset = -offset;
} else } else
adj = clocksource_bigadjust(error, &interval, &offset); adj = timekeeping_bigadjust(error, &interval, &offset);
} else } else
return; return;
clock->mult += adj; timekeeper.mult += adj;
clock->xtime_interval += interval; timekeeper.xtime_interval += interval;
clock->xtime_nsec -= offset; timekeeper.xtime_nsec -= offset;
clock->error -= (interval - offset) << timekeeper.ntp_error -= (interval - offset) <<
(NTP_SCALE_SHIFT - clock->shift); timekeeper.ntp_error_shift;
} }
/** /**
...@@ -492,53 +728,59 @@ static void clocksource_adjust(s64 offset) ...@@ -492,53 +728,59 @@ static void clocksource_adjust(s64 offset)
*/ */
void update_wall_time(void) void update_wall_time(void)
{ {
struct clocksource *clock;
cycle_t offset; cycle_t offset;
u64 nsecs;
/* Make sure we're fully resumed: */ /* Make sure we're fully resumed: */
if (unlikely(timekeeping_suspended)) if (unlikely(timekeeping_suspended))
return; return;
clock = timekeeper.clock;
#ifdef CONFIG_GENERIC_TIME #ifdef CONFIG_GENERIC_TIME
offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
#else #else
offset = clock->cycle_interval; offset = timekeeper.cycle_interval;
#endif #endif
clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift; timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
/* normally this loop will run just once, however in the /* normally this loop will run just once, however in the
* case of lost or late ticks, it will accumulate correctly. * case of lost or late ticks, it will accumulate correctly.
*/ */
while (offset >= clock->cycle_interval) { while (offset >= timekeeper.cycle_interval) {
u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
/* accumulate one interval */ /* accumulate one interval */
offset -= clock->cycle_interval; offset -= timekeeper.cycle_interval;
clock->cycle_last += clock->cycle_interval; clock->cycle_last += timekeeper.cycle_interval;
clock->xtime_nsec += clock->xtime_interval; timekeeper.xtime_nsec += timekeeper.xtime_interval;
if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { if (timekeeper.xtime_nsec >= nsecps) {
clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; timekeeper.xtime_nsec -= nsecps;
xtime.tv_sec++; xtime.tv_sec++;
second_overflow(); second_overflow();
} }
clock->raw_time.tv_nsec += clock->raw_interval; raw_time.tv_nsec += timekeeper.raw_interval;
if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) { if (raw_time.tv_nsec >= NSEC_PER_SEC) {
clock->raw_time.tv_nsec -= NSEC_PER_SEC; raw_time.tv_nsec -= NSEC_PER_SEC;
clock->raw_time.tv_sec++; raw_time.tv_sec++;
} }
/* accumulate error between NTP and clock interval */ /* accumulate error between NTP and clock interval */
clock->error += tick_length; timekeeper.ntp_error += tick_length;
clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); timekeeper.ntp_error -= timekeeper.xtime_interval <<
timekeeper.ntp_error_shift;
} }
/* correct the clock when NTP error is too big */ /* correct the clock when NTP error is too big */
clocksource_adjust(offset); timekeeping_adjust(offset);
/* /*
* Since in the loop above, we accumulate any amount of time * Since in the loop above, we accumulate any amount of time
* in xtime_nsec over a second into xtime.tv_sec, its possible for * in xtime_nsec over a second into xtime.tv_sec, its possible for
* xtime_nsec to be fairly small after the loop. Further, if we're * xtime_nsec to be fairly small after the loop. Further, if we're
* slightly speeding the clocksource up in clocksource_adjust(), * slightly speeding the clocksource up in timekeeping_adjust(),
* its possible the required corrective factor to xtime_nsec could * its possible the required corrective factor to xtime_nsec could
* cause it to underflow. * cause it to underflow.
* *
...@@ -550,24 +792,25 @@ void update_wall_time(void) ...@@ -550,24 +792,25 @@ void update_wall_time(void)
* We'll correct this error next time through this function, when * We'll correct this error next time through this function, when
* xtime_nsec is not as small. * xtime_nsec is not as small.
*/ */
if (unlikely((s64)clock->xtime_nsec < 0)) { if (unlikely((s64)timekeeper.xtime_nsec < 0)) {
s64 neg = -(s64)clock->xtime_nsec; s64 neg = -(s64)timekeeper.xtime_nsec;
clock->xtime_nsec = 0; timekeeper.xtime_nsec = 0;
clock->error += neg << (NTP_SCALE_SHIFT - clock->shift); timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
} }
/* store full nanoseconds into xtime after rounding it up and /* store full nanoseconds into xtime after rounding it up and
* add the remainder to the error difference. * add the remainder to the error difference.
*/ */
xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1; xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; timekeeper.xtime_nsec -= (s64) xtime.tv_nsec << timekeeper.shift;
clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift); timekeeper.ntp_error += timekeeper.xtime_nsec <<
timekeeper.ntp_error_shift;
update_xtime_cache(cyc2ns(clock, offset)); nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
update_xtime_cache(nsecs);
/* check to see if there is a new clocksource to use */ /* check to see if there is a new clocksource to use */
change_clocksource(); update_vsyscall(&xtime, timekeeper.clock);
update_vsyscall(&xtime, clock);
} }
/** /**
...@@ -583,9 +826,12 @@ void update_wall_time(void) ...@@ -583,9 +826,12 @@ void update_wall_time(void)
*/ */
void getboottime(struct timespec *ts) void getboottime(struct timespec *ts)
{ {
set_normalized_timespec(ts, struct timespec boottime = {
- (wall_to_monotonic.tv_sec + total_sleep_time), .tv_sec = wall_to_monotonic.tv_sec + total_sleep_time.tv_sec,
- wall_to_monotonic.tv_nsec); .tv_nsec = wall_to_monotonic.tv_nsec + total_sleep_time.tv_nsec
};
set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
} }
/** /**
...@@ -594,7 +840,7 @@ void getboottime(struct timespec *ts) ...@@ -594,7 +840,7 @@ void getboottime(struct timespec *ts)
*/ */
void monotonic_to_bootbased(struct timespec *ts) void monotonic_to_bootbased(struct timespec *ts)
{ {
ts->tv_sec += total_sleep_time; *ts = timespec_add_safe(*ts, total_sleep_time);
} }
unsigned long get_seconds(void) unsigned long get_seconds(void)
...@@ -603,6 +849,10 @@ unsigned long get_seconds(void) ...@@ -603,6 +849,10 @@ unsigned long get_seconds(void)
} }
EXPORT_SYMBOL(get_seconds); EXPORT_SYMBOL(get_seconds);
struct timespec __current_kernel_time(void)
{
return xtime_cache;
}
struct timespec current_kernel_time(void) struct timespec current_kernel_time(void)
{ {
...@@ -618,3 +868,20 @@ struct timespec current_kernel_time(void) ...@@ -618,3 +868,20 @@ struct timespec current_kernel_time(void)
return now; return now;
} }
EXPORT_SYMBOL(current_kernel_time); EXPORT_SYMBOL(current_kernel_time);
struct timespec get_monotonic_coarse(void)
{
struct timespec now, mono;
unsigned long seq;
do {
seq = read_seqbegin(&xtime_lock);
now = xtime_cache;
mono = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));
set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
now.tv_nsec + mono.tv_nsec);
return now;
}
...@@ -72,6 +72,7 @@ struct tvec_base { ...@@ -72,6 +72,7 @@ struct tvec_base {
spinlock_t lock; spinlock_t lock;
struct timer_list *running_timer; struct timer_list *running_timer;
unsigned long timer_jiffies; unsigned long timer_jiffies;
unsigned long next_timer;
struct tvec_root tv1; struct tvec_root tv1;
struct tvec tv2; struct tvec tv2;
struct tvec tv3; struct tvec tv3;
...@@ -622,6 +623,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, ...@@ -622,6 +623,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
if (timer_pending(timer)) { if (timer_pending(timer)) {
detach_timer(timer, 0); detach_timer(timer, 0);
if (timer->expires == base->next_timer &&
!tbase_get_deferrable(timer->base))
base->next_timer = base->timer_jiffies;
ret = 1; ret = 1;
} else { } else {
if (pending_only) if (pending_only)
...@@ -663,6 +667,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, ...@@ -663,6 +667,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
} }
timer->expires = expires; timer->expires = expires;
if (time_before(timer->expires, base->next_timer) &&
!tbase_get_deferrable(timer->base))
base->next_timer = timer->expires;
internal_add_timer(base, timer); internal_add_timer(base, timer);
out_unlock: out_unlock:
...@@ -781,6 +788,9 @@ void add_timer_on(struct timer_list *timer, int cpu) ...@@ -781,6 +788,9 @@ void add_timer_on(struct timer_list *timer, int cpu)
spin_lock_irqsave(&base->lock, flags); spin_lock_irqsave(&base->lock, flags);
timer_set_base(timer, base); timer_set_base(timer, base);
debug_timer_activate(timer); debug_timer_activate(timer);
if (time_before(timer->expires, base->next_timer) &&
!tbase_get_deferrable(timer->base))
base->next_timer = timer->expires;
internal_add_timer(base, timer); internal_add_timer(base, timer);
/* /*
* Check whether the other CPU is idle and needs to be * Check whether the other CPU is idle and needs to be
...@@ -817,6 +827,9 @@ int del_timer(struct timer_list *timer) ...@@ -817,6 +827,9 @@ int del_timer(struct timer_list *timer)
base = lock_timer_base(timer, &flags); base = lock_timer_base(timer, &flags);
if (timer_pending(timer)) { if (timer_pending(timer)) {
detach_timer(timer, 1); detach_timer(timer, 1);
if (timer->expires == base->next_timer &&
!tbase_get_deferrable(timer->base))
base->next_timer = base->timer_jiffies;
ret = 1; ret = 1;
} }
spin_unlock_irqrestore(&base->lock, flags); spin_unlock_irqrestore(&base->lock, flags);
...@@ -850,6 +863,9 @@ int try_to_del_timer_sync(struct timer_list *timer) ...@@ -850,6 +863,9 @@ int try_to_del_timer_sync(struct timer_list *timer)
ret = 0; ret = 0;
if (timer_pending(timer)) { if (timer_pending(timer)) {
detach_timer(timer, 1); detach_timer(timer, 1);
if (timer->expires == base->next_timer &&
!tbase_get_deferrable(timer->base))
base->next_timer = base->timer_jiffies;
ret = 1; ret = 1;
} }
out: out:
...@@ -1007,8 +1023,8 @@ static inline void __run_timers(struct tvec_base *base) ...@@ -1007,8 +1023,8 @@ static inline void __run_timers(struct tvec_base *base)
#ifdef CONFIG_NO_HZ #ifdef CONFIG_NO_HZ
/* /*
* Find out when the next timer event is due to happen. This * Find out when the next timer event is due to happen. This
* is used on S/390 to stop all activity when a cpus is idle. * is used on S/390 to stop all activity when a CPU is idle.
* This functions needs to be called disabled. * This function needs to be called with interrupts disabled.
*/ */
static unsigned long __next_timer_interrupt(struct tvec_base *base) static unsigned long __next_timer_interrupt(struct tvec_base *base)
{ {
...@@ -1134,7 +1150,9 @@ unsigned long get_next_timer_interrupt(unsigned long now) ...@@ -1134,7 +1150,9 @@ unsigned long get_next_timer_interrupt(unsigned long now)
unsigned long expires; unsigned long expires;
spin_lock(&base->lock); spin_lock(&base->lock);
expires = __next_timer_interrupt(base); if (time_before_eq(base->next_timer, base->timer_jiffies))
base->next_timer = __next_timer_interrupt(base);
expires = base->next_timer;
spin_unlock(&base->lock); spin_unlock(&base->lock);
if (time_before_eq(expires, now)) if (time_before_eq(expires, now))
...@@ -1522,6 +1540,7 @@ static int __cpuinit init_timers_cpu(int cpu) ...@@ -1522,6 +1540,7 @@ static int __cpuinit init_timers_cpu(int cpu)
INIT_LIST_HEAD(base->tv1.vec + j); INIT_LIST_HEAD(base->tv1.vec + j);
base->timer_jiffies = jiffies; base->timer_jiffies = jiffies;
base->next_timer = base->timer_jiffies;
return 0; return 0;
} }
...@@ -1534,6 +1553,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea ...@@ -1534,6 +1553,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
timer = list_first_entry(head, struct timer_list, entry); timer = list_first_entry(head, struct timer_list, entry);
detach_timer(timer, 0); detach_timer(timer, 0);
timer_set_base(timer, new_base); timer_set_base(timer, new_base);
if (time_before(timer->expires, new_base->next_timer) &&
!tbase_get_deferrable(timer->base))
new_base->next_timer = timer->expires;
internal_add_timer(new_base, timer); internal_add_timer(new_base, timer);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment