Commit ab1831b0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'bugfix' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen

* 'bugfix' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen:
  xen: try harder to balloon up under memory pressure.
  Xen balloon: fix totalram_pages counting.
  xen: explicitly create/destroy stop_machine workqueues outside suspend/resume region.
  xen: improve error handling in do_suspend.
  xen: don't leak IRQs over suspend/resume.
  xen: call clock resume notifier on all CPUs
  xen: use iret for return from 64b kernel to 32b usermode
  xen: don't call dpm_resume_noirq() with interrupts disabled.
  xen: register runstate info for boot CPU early
  xen: register runstate on secondary CPUs
  xen: register timer interrupt with IRQF_TIMER
  xen: correctly restore pfn_to_mfn_list_list after resume
  xen: restore runstate_info even if !have_vcpu_info_placement
  xen: re-register runstate area earlier on resume.
  xen: wait up to 5 minutes for device connetion
  xen: improvement to wait_for_devices()
  xen: fix is_disconnected_device/exists_disconnected_device
  xen/xenbus: make DEVICE_ATTR()s static
parents eae6fa9b bc2c0303
......@@ -138,24 +138,23 @@ static void xen_vcpu_setup(int cpu)
*/
void xen_vcpu_restore(void)
{
if (have_vcpu_info_placement) {
int cpu;
int cpu;
for_each_online_cpu(cpu) {
bool other_cpu = (cpu != smp_processor_id());
for_each_online_cpu(cpu) {
bool other_cpu = (cpu != smp_processor_id());
if (other_cpu &&
HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
BUG();
if (other_cpu &&
HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
BUG();
xen_vcpu_setup(cpu);
xen_setup_runstate_info(cpu);
if (other_cpu &&
HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
BUG();
}
if (have_vcpu_info_placement)
xen_vcpu_setup(cpu);
BUG_ON(!have_vcpu_info_placement);
if (other_cpu &&
HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
BUG();
}
}
......@@ -1180,6 +1179,8 @@ asmlinkage void __init xen_start_kernel(void)
xen_raw_console_write("about to get started...\n");
xen_setup_runstate_info(0);
/* Start the world */
#ifdef CONFIG_X86_32
i386_start_kernel();
......
......@@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn)
}
/* Build the parallel p2m_top_mfn structures */
static void __init xen_build_mfn_list_list(void)
void xen_build_mfn_list_list(void)
{
unsigned pfn, idx;
......
......@@ -295,6 +295,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
(unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_init_lock_cpu(cpu);
......
#include <linux/types.h>
#include <linux/clockchips.h>
#include <xen/interface/xen.h>
#include <xen/grant_table.h>
......@@ -27,6 +28,8 @@ void xen_pre_suspend(void)
void xen_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
xen_setup_shared_info();
if (suspend_cancelled) {
......@@ -44,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled)
}
static void xen_vcpu_notify_restore(void *data)
{
unsigned long reason = (unsigned long)data;
/* Boot processor notified via generic timekeeping_resume() */
if ( smp_processor_id() == 0)
return;
clockevents_notify(reason, NULL);
}
void xen_arch_resume(void)
{
/* nothing */
smp_call_function(xen_vcpu_notify_restore,
(void *)CLOCK_EVT_NOTIFY_RESUME, 1);
}
......@@ -100,7 +100,7 @@ bool xen_vcpu_stolen(int vcpu)
return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
}
static void setup_runstate_info(int cpu)
void xen_setup_runstate_info(int cpu)
{
struct vcpu_register_runstate_memory_area area;
......@@ -434,7 +434,7 @@ void xen_setup_timer(int cpu)
name = "<timer kasprintf failed>";
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
name, NULL);
evt = &per_cpu(xen_clock_events, cpu);
......@@ -442,8 +442,6 @@ void xen_setup_timer(int cpu)
evt->cpumask = cpumask_of(cpu);
evt->irq = irq;
setup_runstate_info(cpu);
}
void xen_teardown_timer(int cpu)
......@@ -494,6 +492,7 @@ __init void xen_time_init(void)
setup_force_cpu_cap(X86_FEATURE_TSC);
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
}
......@@ -96,7 +96,7 @@ ENTRY(xen_sysret32)
pushq $__USER32_CS
pushq %rcx
pushq $VGCF_in_syscall
pushq $0
1: jmp hypercall_iret
ENDPATCH(xen_sysret32)
RELOC(xen_sysret32, 1b+1)
......@@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target)
ENTRY(xen_sysenter_target)
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $VGCF_in_syscall
pushq $0
jmp hypercall_iret
ENDPROC(xen_syscall32_target)
ENDPROC(xen_sysenter_target)
......
......@@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
void xen_setup_mfn_list_list(void);
void xen_setup_shared_info(void);
void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_ident_map_ISA(void);
......@@ -41,6 +42,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);
void xen_setup_runstate_info(int cpu);
void xen_teardown_timer(int cpu);
cycle_t xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void);
......
......@@ -66,8 +66,6 @@ struct balloon_stats {
/* We aim for 'current allocation' == 'target allocation'. */
unsigned long current_pages;
unsigned long target_pages;
/* We may hit the hard limit in Xen. If we do then we remember it. */
unsigned long hard_limit;
/*
* Drivers may alter the memory reservation independently, but they
* must inform the balloon driver so we avoid hitting the hard limit.
......@@ -136,6 +134,8 @@ static void balloon_append(struct page *page)
list_add(&page->lru, &ballooned_pages);
balloon_stats.balloon_low++;
}
totalram_pages--;
}
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
......@@ -156,6 +156,8 @@ static struct page *balloon_retrieve(void)
else
balloon_stats.balloon_low--;
totalram_pages++;
return page;
}
......@@ -181,7 +183,7 @@ static void balloon_alarm(unsigned long unused)
static unsigned long current_target(void)
{
unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
unsigned long target = balloon_stats.target_pages;
target = min(target,
balloon_stats.current_pages +
......@@ -217,23 +219,10 @@ static int increase_reservation(unsigned long nr_pages)
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
if (rc < nr_pages) {
if (rc > 0) {
int ret;
/* We hit the Xen hard limit: reprobe. */
reservation.nr_extents = rc;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
BUG_ON(ret != rc);
}
if (rc >= 0)
balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
balloon_stats.driver_pages);
if (rc < 0)
goto out;
}
for (i = 0; i < nr_pages; i++) {
for (i = 0; i < rc; i++) {
page = balloon_retrieve();
BUG_ON(page == NULL);
......@@ -259,13 +248,12 @@ static int increase_reservation(unsigned long nr_pages)
__free_page(page);
}
balloon_stats.current_pages += nr_pages;
totalram_pages = balloon_stats.current_pages;
balloon_stats.current_pages += rc;
out:
spin_unlock_irqrestore(&balloon_lock, flags);
return 0;
return rc < 0 ? rc : rc != nr_pages;
}
static int decrease_reservation(unsigned long nr_pages)
......@@ -323,7 +311,6 @@ static int decrease_reservation(unsigned long nr_pages)
BUG_ON(ret != nr_pages);
balloon_stats.current_pages -= nr_pages;
totalram_pages = balloon_stats.current_pages;
spin_unlock_irqrestore(&balloon_lock, flags);
......@@ -367,7 +354,6 @@ static void balloon_process(struct work_struct *work)
static void balloon_set_new_target(unsigned long target)
{
/* No need for lock. Not read-modify-write updates. */
balloon_stats.hard_limit = ~0UL;
balloon_stats.target_pages = target;
schedule_work(&balloon_worker);
}
......@@ -422,12 +408,10 @@ static int __init balloon_init(void)
pr_info("xen_balloon: Initialising balloon driver.\n");
balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
totalram_pages = balloon_stats.current_pages;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
balloon_stats.driver_pages = 0UL;
balloon_stats.hard_limit = ~0UL;
init_timer(&balloon_timer);
balloon_timer.data = 0;
......@@ -472,9 +456,6 @@ module_exit(balloon_exit);
BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
BALLOON_SHOW(hard_limit_kb,
(balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
(balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
......@@ -544,7 +525,6 @@ static struct attribute *balloon_info_attrs[] = {
&attr_current_kb.attr,
&attr_low_kb.attr,
&attr_high_kb.attr,
&attr_hard_limit_kb.attr,
&attr_driver_kb.attr,
NULL
};
......
......@@ -474,6 +474,9 @@ static void unbind_from_irq(unsigned int irq)
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
}
if (irq_info[irq].type != IRQT_UNBOUND) {
irq_info[irq] = mk_unbound_info();
dynamic_irq_cleanup(irq);
......
......@@ -43,7 +43,6 @@ static int xen_suspend(void *data)
if (err) {
printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
err);
dpm_resume_noirq(PMSG_RESUME);
return err;
}
......@@ -69,7 +68,6 @@ static int xen_suspend(void *data)
}
sysdev_resume();
dpm_resume_noirq(PMSG_RESUME);
return 0;
}
......@@ -81,6 +79,12 @@ static void do_suspend(void)
shutting_down = SHUTDOWN_SUSPEND;
err = stop_machine_create();
if (err) {
printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
goto out;
}
#ifdef CONFIG_PREEMPT
/* If the kernel is preemptible, we need to freeze all the processes
to prevent them from being in the middle of a pagetable update
......@@ -88,29 +92,32 @@ static void do_suspend(void)
err = freeze_processes();
if (err) {
printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
return;
goto out_destroy_sm;
}
#endif
err = dpm_suspend_start(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err);
goto out;
goto out_thaw;
}
printk(KERN_DEBUG "suspending xenstore...\n");
xs_suspend();
err = dpm_suspend_noirq(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
goto resume_devices;
goto out_resume;
}
printk(KERN_DEBUG "suspending xenstore...\n");
xs_suspend();
err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
dpm_resume_noirq(PMSG_RESUME);
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
goto out;
cancelled = 1;
}
if (!cancelled) {
......@@ -119,17 +126,21 @@ static void do_suspend(void)
} else
xs_suspend_cancel();
dpm_resume_noirq(PMSG_RESUME);
resume_devices:
out_resume:
dpm_resume_end(PMSG_RESUME);
/* Make sure timer events get retriggered on all CPUs */
clock_was_set();
out:
out_thaw:
#ifdef CONFIG_PREEMPT
thaw_processes();
out_destroy_sm:
#endif
stop_machine_destroy();
out:
shutting_down = SHUTDOWN_INVALID;
}
#endif /* CONFIG_PM_SLEEP */
......
......@@ -454,21 +454,21 @@ static ssize_t xendev_show_nodename(struct device *dev,
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
}
DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
static ssize_t xendev_show_devtype(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
}
DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
static ssize_t xendev_show_modalias(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
}
DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
......@@ -843,7 +843,7 @@ postcore_initcall(xenbus_probe_init);
MODULE_LICENSE("GPL");
static int is_disconnected_device(struct device *dev, void *data)
static int is_device_connecting(struct device *dev, void *data)
{
struct xenbus_device *xendev = to_xenbus_device(dev);
struct device_driver *drv = data;
......@@ -861,14 +861,15 @@ static int is_disconnected_device(struct device *dev, void *data)
return 0;
xendrv = to_xenbus_driver(dev->driver);
return (xendev->state != XenbusStateConnected ||
(xendrv->is_ready && !xendrv->is_ready(xendev)));
return (xendev->state < XenbusStateConnected ||
(xendev->state == XenbusStateConnected &&
xendrv->is_ready && !xendrv->is_ready(xendev)));
}
static int exists_disconnected_device(struct device_driver *drv)
static int exists_connecting_device(struct device_driver *drv)
{
return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
is_disconnected_device);
is_device_connecting);
}
static int print_device_status(struct device *dev, void *data)
......@@ -884,10 +885,13 @@ static int print_device_status(struct device *dev, void *data)
/* Information only: is this too noisy? */
printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
xendev->nodename);
} else if (xendev->state != XenbusStateConnected) {
} else if (xendev->state < XenbusStateConnected) {
enum xenbus_state rstate = XenbusStateUnknown;
if (xendev->otherend)
rstate = xenbus_read_driver_state(xendev->otherend);
printk(KERN_WARNING "XENBUS: Timeout connecting "
"to device: %s (state %d)\n",
xendev->nodename, xendev->state);
"to device: %s (local state %d, remote state %d)\n",
xendev->nodename, xendev->state, rstate);
}
return 0;
......@@ -897,7 +901,7 @@ static int print_device_status(struct device *dev, void *data)
static int ready_to_wait_for_devices;
/*
* On a 10 second timeout, wait for all devices currently configured. We need
* On a 5-minute timeout, wait for all devices currently configured. We need
* to do this to guarantee that the filesystems and / or network devices
* needed for boot are available, before we can allow the boot to proceed.
*
......@@ -912,18 +916,30 @@ static int ready_to_wait_for_devices;
*/
static void wait_for_devices(struct xenbus_driver *xendrv)
{
unsigned long timeout = jiffies + 10*HZ;
unsigned long start = jiffies;
struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
unsigned int seconds_waited = 0;
if (!ready_to_wait_for_devices || !xen_domain())
return;
while (exists_disconnected_device(drv)) {
if (time_after(jiffies, timeout))
break;
while (exists_connecting_device(drv)) {
if (time_after(jiffies, start + (seconds_waited+5)*HZ)) {
if (!seconds_waited)
printk(KERN_WARNING "XENBUS: Waiting for "
"devices to initialise: ");
seconds_waited += 5;
printk("%us...", 300 - seconds_waited);
if (seconds_waited == 300)
break;
}
schedule_timeout_interruptible(HZ/10);
}
if (seconds_waited)
printk("\n");
bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
print_device_status);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment