Commit bbab4f3b authored by Zachary Amsden's avatar Zachary Amsden Committed by Andi Kleen

[PATCH] i386: vMI timer patches

VMI timer code.  It works by taking over the local APIC clock when APIC is
configured, which requires a couple hooks into the APIC code.  The backend
timer code could be commonized into the timer infrastructure, but there are
some pieces missing (stolen time, in particular), and the exact semantics of
when to do accounting for NO_IDLE need to be shared between different
hypervisors as well.  So for now, VMI timer is a separate module.

[Adrian Bunk: cleanups]

Subject: VMI timer patches
Signed-off-by: default avatarZachary Amsden <zach@vmware.com>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
parent 7ce0bcfd
...@@ -1272,3 +1272,12 @@ config X86_TRAMPOLINE ...@@ -1272,3 +1272,12 @@ config X86_TRAMPOLINE
config KTIME_SCALAR config KTIME_SCALAR
bool bool
default y default y
config NO_IDLE_HZ
bool
depends on PARAVIRT
default y
help
Switches the regular HZ timer off when the system is going idle.
This helps a hypervisor detect that the Linux system is idle,
reducing the overhead of idle systems.
...@@ -40,7 +40,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ...@@ -40,7 +40,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_K8_NB) += k8.o
obj-$(CONFIG_VMI) += vmi.o obj-$(CONFIG_VMI) += vmi.o vmitime.o
# Make sure this is linked after any other paravirt_ops structs: see head.S # Make sure this is linked after any other paravirt_ops structs: see head.S
obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_PARAVIRT) += paravirt.o
......
...@@ -1395,7 +1395,7 @@ int __init APIC_init_uniprocessor (void) ...@@ -1395,7 +1395,7 @@ int __init APIC_init_uniprocessor (void)
if (!skip_ioapic_setup && nr_ioapics) if (!skip_ioapic_setup && nr_ioapics)
setup_IO_APIC(); setup_IO_APIC();
#endif #endif
setup_boot_APIC_clock(); setup_boot_clock();
return 0; return 0;
} }
......
...@@ -626,6 +626,11 @@ ENTRY(name) \ ...@@ -626,6 +626,11 @@ ENTRY(name) \
/* The include is where all of the SMP etc. interrupts come from */ /* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h" #include "entry_arch.h"
/* This alternate entry is needed because we hijack the apic LVTT */
#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
#endif
KPROBE_ENTRY(page_fault) KPROBE_ENTRY(page_fault)
RING0_EC_FRAME RING0_EC_FRAME
pushl $do_page_fault pushl $do_page_fault
......
...@@ -544,6 +544,8 @@ struct paravirt_ops paravirt_ops = { ...@@ -544,6 +544,8 @@ struct paravirt_ops paravirt_ops = {
.apic_write = native_apic_write, .apic_write = native_apic_write,
.apic_write_atomic = native_apic_write_atomic, .apic_write_atomic = native_apic_write_atomic,
.apic_read = native_apic_read, .apic_read = native_apic_read,
.setup_boot_clock = setup_boot_APIC_clock,
.setup_secondary_clock = setup_secondary_APIC_clock,
#endif #endif
.set_lazy_mode = (void *)native_nop, .set_lazy_mode = (void *)native_nop,
......
...@@ -554,7 +554,7 @@ static void __cpuinit start_secondary(void *unused) ...@@ -554,7 +554,7 @@ static void __cpuinit start_secondary(void *unused)
smp_callin(); smp_callin();
while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
rep_nop(); rep_nop();
setup_secondary_APIC_clock(); setup_secondary_clock();
if (nmi_watchdog == NMI_IO_APIC) { if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0); disable_8259A_irq(0);
enable_NMI_through_LVT0(NULL); enable_NMI_through_LVT0(NULL);
...@@ -1331,7 +1331,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ...@@ -1331,7 +1331,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
smpboot_setup_io_apic(); smpboot_setup_io_apic();
setup_boot_APIC_clock(); setup_boot_clock();
/* /*
* Synchronize the TSC with the AP * Synchronize the TSC with the AP
......
...@@ -232,6 +232,7 @@ EXPORT_SYMBOL(get_cmos_time); ...@@ -232,6 +232,7 @@ EXPORT_SYMBOL(get_cmos_time);
static void sync_cmos_clock(unsigned long dummy); static void sync_cmos_clock(unsigned long dummy);
static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
int no_sync_cmos_clock;
static void sync_cmos_clock(unsigned long dummy) static void sync_cmos_clock(unsigned long dummy)
{ {
...@@ -275,6 +276,7 @@ static void sync_cmos_clock(unsigned long dummy) ...@@ -275,6 +276,7 @@ static void sync_cmos_clock(unsigned long dummy)
void notify_arch_cmos_timer(void) void notify_arch_cmos_timer(void)
{ {
if (!no_sync_cmos_clock)
mod_timer(&sync_cmos_timer, jiffies + 1); mod_timer(&sync_cmos_timer, jiffies + 1);
} }
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
* an extra value to store the TSC freq * an extra value to store the TSC freq
*/ */
unsigned int tsc_khz; unsigned int tsc_khz;
unsigned long long (*custom_sched_clock)(void);
int tsc_disable; int tsc_disable;
...@@ -107,6 +108,9 @@ unsigned long long sched_clock(void) ...@@ -107,6 +108,9 @@ unsigned long long sched_clock(void)
{ {
unsigned long long this_offset; unsigned long long this_offset;
if (unlikely(custom_sched_clock))
return (*custom_sched_clock)();
/* /*
* in the NUMA case we dont use the TSC as they are not * in the NUMA case we dont use the TSC as they are not
* synchronized across all CPUs. * synchronized across all CPUs.
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/timer.h> #include <asm/timer.h>
#include <asm/vmi_time.h>
/* Convenient for calling VMI functions indirectly in the ROM */ /* Convenient for calling VMI functions indirectly in the ROM */
typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
...@@ -67,6 +68,7 @@ struct { ...@@ -67,6 +68,7 @@ struct {
void (*set_linear_mapping)(int, u32, u32, u32); void (*set_linear_mapping)(int, u32, u32, u32);
void (*flush_tlb)(int); void (*flush_tlb)(int);
void (*set_initial_ap_state)(int, int); void (*set_initial_ap_state)(int, int);
void (*halt)(void);
} vmi_ops; } vmi_ops;
/* XXX move this to alternative.h */ /* XXX move this to alternative.h */
...@@ -252,6 +254,19 @@ static void vmi_nop(void) ...@@ -252,6 +254,19 @@ static void vmi_nop(void)
{ {
} }
/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
#ifdef CONFIG_NO_IDLE_HZ
static fastcall void vmi_safe_halt(void)
{
int idle = vmi_stop_hz_timer();
vmi_ops.halt();
if (idle) {
local_irq_disable();
vmi_account_time_restart_hz_timer();
local_irq_enable();
}
}
#endif
#ifdef CONFIG_DEBUG_PAGE_TYPE #ifdef CONFIG_DEBUG_PAGE_TYPE
...@@ -727,7 +742,12 @@ static inline int __init activate_vmi(void) ...@@ -727,7 +742,12 @@ static inline int __init activate_vmi(void)
(char *)paravirt_ops.save_fl); (char *)paravirt_ops.save_fl);
patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
(char *)paravirt_ops.irq_disable); (char *)paravirt_ops.irq_disable);
#ifndef CONFIG_NO_IDLE_HZ
para_fill(safe_halt, Halt); para_fill(safe_halt, Halt);
#else
vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
paravirt_ops.safe_halt = vmi_safe_halt;
#endif
para_fill(wbinvd, WBINVD); para_fill(wbinvd, WBINVD);
/* paravirt_ops.read_msr = vmi_rdmsr */ /* paravirt_ops.read_msr = vmi_rdmsr */
/* paravirt_ops.write_msr = vmi_wrmsr */ /* paravirt_ops.write_msr = vmi_wrmsr */
...@@ -837,6 +857,31 @@ static inline int __init activate_vmi(void) ...@@ -837,6 +857,31 @@ static inline int __init activate_vmi(void)
paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite); paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite);
#endif #endif
/*
* Check for VMI timer functionality by probing for a cycle frequency method
*/
reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
if (rel->type != VMI_RELOCATION_NONE) {
vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
vmi_timer_ops.get_cycle_counter =
vmi_get_function(VMI_CALL_GetCycleCounter);
vmi_timer_ops.get_wallclock =
vmi_get_function(VMI_CALL_GetWallclockTime);
vmi_timer_ops.wallclock_updated =
vmi_get_function(VMI_CALL_WallclockUpdated);
vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
vmi_timer_ops.cancel_alarm =
vmi_get_function(VMI_CALL_CancelAlarm);
paravirt_ops.time_init = vmi_time_init;
paravirt_ops.get_wallclock = vmi_get_wallclock;
paravirt_ops.set_wallclock = vmi_set_wallclock;
#ifdef CONFIG_X86_LOCAL_APIC
paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
#endif
custom_sched_clock = vmi_sched_clock;
}
/* /*
* Alternative instruction rewriting doesn't happen soon enough * Alternative instruction rewriting doesn't happen soon enough
* to convert VMI_IRET to a call instead of a jump; so we have * to convert VMI_IRET to a call instead of a jump; so we have
......
This diff is collapsed.
...@@ -43,6 +43,8 @@ extern void generic_apic_probe(void); ...@@ -43,6 +43,8 @@ extern void generic_apic_probe(void);
#define apic_write native_apic_write #define apic_write native_apic_write
#define apic_write_atomic native_apic_write_atomic #define apic_write_atomic native_apic_write_atomic
#define apic_read native_apic_read #define apic_read native_apic_read
#define setup_boot_clock setup_boot_APIC_clock
#define setup_secondary_clock setup_secondary_APIC_clock
#endif #endif
static __inline fastcall void native_apic_write(unsigned long reg, static __inline fastcall void native_apic_write(unsigned long reg,
......
...@@ -121,6 +121,8 @@ struct paravirt_ops ...@@ -121,6 +121,8 @@ struct paravirt_ops
void (fastcall *apic_write)(unsigned long reg, unsigned long v); void (fastcall *apic_write)(unsigned long reg, unsigned long v);
void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v); void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v);
unsigned long (fastcall *apic_read)(unsigned long reg); unsigned long (fastcall *apic_read)(unsigned long reg);
void (*setup_boot_clock)(void);
void (*setup_secondary_clock)(void);
#endif #endif
void (fastcall *flush_tlb_user)(void); void (fastcall *flush_tlb_user)(void);
...@@ -323,6 +325,16 @@ static inline unsigned long apic_read(unsigned long reg) ...@@ -323,6 +325,16 @@ static inline unsigned long apic_read(unsigned long reg)
{ {
return paravirt_ops.apic_read(reg); return paravirt_ops.apic_read(reg);
} }
static inline void setup_boot_clock(void)
{
paravirt_ops.setup_boot_clock();
}
static inline void setup_secondary_clock(void)
{
paravirt_ops.setup_secondary_clock();
}
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -30,6 +30,7 @@ static inline int native_set_wallclock(unsigned long nowtime) ...@@ -30,6 +30,7 @@ static inline int native_set_wallclock(unsigned long nowtime)
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h> #include <asm/paravirt.h>
extern unsigned long long native_sched_clock(void);
#else /* !CONFIG_PARAVIRT */ #else /* !CONFIG_PARAVIRT */
#define get_wallclock() native_get_wallclock() #define get_wallclock() native_get_wallclock()
......
...@@ -9,6 +9,8 @@ void setup_pit_timer(void); ...@@ -9,6 +9,8 @@ void setup_pit_timer(void);
extern int pit_latch_buggy; extern int pit_latch_buggy;
extern int timer_ack; extern int timer_ack;
extern int no_timer_check; extern int no_timer_check;
extern unsigned long long (*custom_sched_clock)(void);
extern int no_sync_cmos_clock;
extern int recalibrate_cpu_khz(void); extern int recalibrate_cpu_khz(void);
#endif #endif
/*
* VMI Time wrappers
*
* Copyright (C) 2006, VMware, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to dhecht@vmware.com
*
*/
#ifndef __VMI_TIME_H
#define __VMI_TIME_H
/*
* Raw VMI call indices for timer functions
*/
#define VMI_CALL_GetCycleFrequency 66
#define VMI_CALL_GetCycleCounter 67
#define VMI_CALL_SetAlarm 68
#define VMI_CALL_CancelAlarm 69
#define VMI_CALL_GetWallclockTime 70
#define VMI_CALL_WallclockUpdated 71
/* Cached VMI timer operations */
extern struct vmi_timer_ops {
u64 (*get_cycle_frequency)(void);
u64 (*get_cycle_counter)(int);
u64 (*get_wallclock)(void);
int (*wallclock_updated)(void);
void (*set_alarm)(u32 flags, u64 expiry, u64 period);
void (*cancel_alarm)(u32 flags);
} vmi_timer_ops;
/* Prototypes */
extern void __init vmi_time_init(void);
extern unsigned long vmi_get_wallclock(void);
extern int vmi_set_wallclock(unsigned long now);
extern unsigned long long vmi_sched_clock(void);
#ifdef CONFIG_X86_LOCAL_APIC
extern void __init vmi_timer_setup_boot_alarm(void);
extern void __init vmi_timer_setup_secondary_alarm(void);
extern void apic_vmi_timer_interrupt(void);
#endif
#ifdef CONFIG_NO_IDLE_HZ
extern int vmi_stop_hz_timer(void);
extern void vmi_account_time_restart_hz_timer(void);
#endif
/*
* When run under a hypervisor, a vcpu is always in one of three states:
* running, halted, or ready. The vcpu is in the 'running' state if it
* is executing. When the vcpu executes the halt interface, the vcpu
* enters the 'halted' state and remains halted until there is some work
* pending for the vcpu (e.g. an alarm expires, host I/O completes on
* behalf of virtual I/O). At this point, the vcpu enters the 'ready'
* state (waiting for the hypervisor to reschedule it). Finally, at any
* time when the vcpu is not in the 'running' state nor the 'halted'
* state, it is in the 'ready' state.
*
* Real time is advances while the vcpu is 'running', 'ready', or
* 'halted'. Stolen time is the time in which the vcpu is in the
* 'ready' state. Available time is the remaining time -- the vcpu is
* either 'running' or 'halted'.
*
* All three views of time are accessible through the VMI cycle
* counters.
*/
/* The cycle counters. */
#define VMI_CYCLES_REAL 0
#define VMI_CYCLES_AVAILABLE 1
#define VMI_CYCLES_STOLEN 2
/* The alarm interface 'flags' bits */
#define VMI_ALARM_COUNTERS 2
#define VMI_ALARM_COUNTER_MASK 0x000000ff
#define VMI_ALARM_WIRED_IRQ0 0x00000000
#define VMI_ALARM_WIRED_LVTT 0x00010000
#define VMI_ALARM_IS_ONESHOT 0x00000000
#define VMI_ALARM_IS_PERIODIC 0x00000100
#define CONFIG_VMI_ALARM_HZ 100
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment