Commit dcbf77b9 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of...

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (37 commits)
  sched: Fix SD_POWERSAVING_BALANCE|SD_PREFER_LOCAL vs SD_WAKE_AFFINE
  sched: Stop buddies from hogging the system
  sched: Add new wakeup preemption mode: WAKEUP_RUNNING
  sched: Fix TASK_WAKING & loadaverage breakage
  sched: Disable wakeup balancing
  sched: Rename flags to wake_flags
  sched: Clean up the load_idx selection in select_task_rq_fair
  sched: Optimize cgroup vs wakeup a bit
  sched: x86: Name old_perf in a unique way
  sched: Implement a gentler fair-sleepers feature
  sched: Add SD_PREFER_LOCAL
  sched: Add a few SYNC hint knobs to play with
  sched: Fix sync wakeups again
  sched: Add WF_FORK
  sched: Rename sync arguments
  sched: Rename select_task_rq() argument
  sched: Feature to disable APERF/MPERF cpu_power
  x86: sched: Provide arch implementations using aperf/mperf
  x86: Add generic aperf/mperf code
  x86: Move APERF/MPERF into a X86_FEATURE
  ...

Fix up trivial conflict in arch/x86/include/asm/processor.h due to
nearby addition of amd_get_nb_id() declaration from the EDAC merge.
parents ca043a66 29cd8bae
...@@ -61,12 +61,13 @@ void build_cpu_to_node_map(void); ...@@ -61,12 +61,13 @@ void build_cpu_to_node_map(void);
.cache_nice_tries = 2, \ .cache_nice_tries = 2, \
.busy_idx = 2, \ .busy_idx = 2, \
.idle_idx = 1, \ .idle_idx = 1, \
.newidle_idx = 2, \ .newidle_idx = 0, \
.wake_idx = 1, \ .wake_idx = 0, \
.forkexec_idx = 1, \ .forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \ .flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \ | SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \ | SD_BALANCE_EXEC \
| SD_BALANCE_FORK \
| SD_WAKE_AFFINE, \ | SD_WAKE_AFFINE, \
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
...@@ -85,14 +86,14 @@ void build_cpu_to_node_map(void); ...@@ -85,14 +86,14 @@ void build_cpu_to_node_map(void);
.cache_nice_tries = 2, \ .cache_nice_tries = 2, \
.busy_idx = 3, \ .busy_idx = 3, \
.idle_idx = 2, \ .idle_idx = 2, \
.newidle_idx = 2, \ .newidle_idx = 0, \
.wake_idx = 1, \ .wake_idx = 0, \
.forkexec_idx = 1, \ .forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \ .flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \ | SD_BALANCE_EXEC \
| SD_BALANCE_FORK \ | SD_BALANCE_FORK \
| SD_SERIALIZE \ | SD_SERIALIZE, \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 64, \ .balance_interval = 64, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
......
...@@ -48,7 +48,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; ...@@ -48,7 +48,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
.cache_nice_tries = 1, \ .cache_nice_tries = 1, \
.flags = SD_LOAD_BALANCE \ .flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \ | SD_BALANCE_EXEC \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
......
...@@ -57,14 +57,13 @@ static inline int pcibus_to_node(struct pci_bus *bus) ...@@ -57,14 +57,13 @@ static inline int pcibus_to_node(struct pci_bus *bus)
.cache_nice_tries = 1, \ .cache_nice_tries = 1, \
.busy_idx = 3, \ .busy_idx = 3, \
.idle_idx = 1, \ .idle_idx = 1, \
.newidle_idx = 2, \ .newidle_idx = 0, \
.wake_idx = 1, \ .wake_idx = 0, \
.flags = SD_LOAD_BALANCE \ .flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \ | SD_BALANCE_EXEC \
| SD_BALANCE_FORK \
| SD_BALANCE_NEWIDLE \ | SD_BALANCE_NEWIDLE \
| SD_WAKE_IDLE \ | SD_SERIALIZE, \
| SD_SERIALIZE \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
......
...@@ -15,14 +15,14 @@ ...@@ -15,14 +15,14 @@
.cache_nice_tries = 2, \ .cache_nice_tries = 2, \
.busy_idx = 3, \ .busy_idx = 3, \
.idle_idx = 2, \ .idle_idx = 2, \
.newidle_idx = 2, \ .newidle_idx = 0, \
.wake_idx = 1, \ .wake_idx = 0, \
.forkexec_idx = 1, \ .forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \ .flags = SD_LOAD_BALANCE \
| SD_BALANCE_FORK \ | SD_BALANCE_FORK \
| SD_BALANCE_EXEC \ | SD_BALANCE_EXEC \
| SD_SERIALIZE \ | SD_BALANCE_NEWIDLE \
| SD_WAKE_BALANCE, \ | SD_SERIALIZE, \
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
......
...@@ -52,13 +52,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus) ...@@ -52,13 +52,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
.busy_idx = 3, \ .busy_idx = 3, \
.idle_idx = 2, \ .idle_idx = 2, \
.newidle_idx = 0, \ .newidle_idx = 0, \
.wake_idx = 1, \ .wake_idx = 0, \
.forkexec_idx = 1, \ .forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \ .flags = SD_LOAD_BALANCE \
| SD_BALANCE_FORK \ | SD_BALANCE_FORK \
| SD_BALANCE_EXEC \ | SD_BALANCE_EXEC \
| SD_SERIALIZE \ | SD_SERIALIZE, \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
} }
......
...@@ -96,6 +96,7 @@ ...@@ -96,6 +96,7 @@
#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
......
...@@ -27,6 +27,7 @@ struct mm_struct; ...@@ -27,6 +27,7 @@ struct mm_struct;
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/math64.h>
#include <linux/init.h> #include <linux/init.h>
/* /*
...@@ -1022,4 +1023,33 @@ extern int set_tsc_mode(unsigned int val); ...@@ -1022,4 +1023,33 @@ extern int set_tsc_mode(unsigned int val);
extern int amd_get_nb_id(int cpu); extern int amd_get_nb_id(int cpu);
struct aperfmperf {
u64 aperf, mperf;
};
static inline void get_aperfmperf(struct aperfmperf *am)
{
WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
rdmsrl(MSR_IA32_APERF, am->aperf);
rdmsrl(MSR_IA32_MPERF, am->mperf);
}
#define APERFMPERF_SHIFT 10
static inline
unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
struct aperfmperf *new)
{
u64 aperf = new->aperf - old->aperf;
u64 mperf = new->mperf - old->mperf;
unsigned long ratio = aperf;
mperf >>= APERFMPERF_SHIFT;
if (mperf)
ratio = div64_u64(aperf, mperf);
return ratio;
}
#endif /* _ASM_X86_PROCESSOR_H */ #endif /* _ASM_X86_PROCESSOR_H */
...@@ -116,15 +116,11 @@ extern unsigned long node_remap_size[]; ...@@ -116,15 +116,11 @@ extern unsigned long node_remap_size[];
# define SD_CACHE_NICE_TRIES 1 # define SD_CACHE_NICE_TRIES 1
# define SD_IDLE_IDX 1 # define SD_IDLE_IDX 1
# define SD_NEWIDLE_IDX 2
# define SD_FORKEXEC_IDX 0
#else #else
# define SD_CACHE_NICE_TRIES 2 # define SD_CACHE_NICE_TRIES 2
# define SD_IDLE_IDX 2 # define SD_IDLE_IDX 2
# define SD_NEWIDLE_IDX 2
# define SD_FORKEXEC_IDX 1
#endif #endif
...@@ -137,22 +133,20 @@ extern unsigned long node_remap_size[]; ...@@ -137,22 +133,20 @@ extern unsigned long node_remap_size[];
.cache_nice_tries = SD_CACHE_NICE_TRIES, \ .cache_nice_tries = SD_CACHE_NICE_TRIES, \
.busy_idx = 3, \ .busy_idx = 3, \
.idle_idx = SD_IDLE_IDX, \ .idle_idx = SD_IDLE_IDX, \
.newidle_idx = SD_NEWIDLE_IDX, \ .newidle_idx = 0, \
.wake_idx = 1, \ .wake_idx = 0, \
.forkexec_idx = SD_FORKEXEC_IDX, \ .forkexec_idx = 0, \
\ \
.flags = 1*SD_LOAD_BALANCE \ .flags = 1*SD_LOAD_BALANCE \
| 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \ | 1*SD_BALANCE_FORK \
| 0*SD_WAKE_IDLE \ | 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \ | 1*SD_WAKE_AFFINE \
| 1*SD_WAKE_BALANCE \
| 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \ | 1*SD_SERIALIZE \
| 1*SD_WAKE_IDLE_FAR \
| 0*SD_PREFER_SIBLING \ | 0*SD_PREFER_SIBLING \
, \ , \
.last_balance = jiffies, \ .last_balance = jiffies, \
......
...@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp) ...@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp)
obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o obj-y += proc.o capflags.o powerflags.o common.o
obj-y += vmware.o hypervisor.o obj-y += vmware.o hypervisor.o sched.o
obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_X86_64) += bugs_64.o
......
...@@ -60,7 +60,6 @@ enum { ...@@ -60,7 +60,6 @@ enum {
}; };
#define INTEL_MSR_RANGE (0xffff) #define INTEL_MSR_RANGE (0xffff)
#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1)
struct acpi_cpufreq_data { struct acpi_cpufreq_data {
struct acpi_processor_performance *acpi_data; struct acpi_processor_performance *acpi_data;
...@@ -71,11 +70,7 @@ struct acpi_cpufreq_data { ...@@ -71,11 +70,7 @@ struct acpi_cpufreq_data {
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
struct acpi_msr_data { static DEFINE_PER_CPU(struct aperfmperf, old_perf);
u64 saved_aperf, saved_mperf;
};
static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
DEFINE_TRACE(power_mark); DEFINE_TRACE(power_mark);
...@@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask) ...@@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask)
return cmd.val; return cmd.val;
} }
struct perf_pair {
union {
struct {
u32 lo;
u32 hi;
} split;
u64 whole;
} aperf, mperf;
};
/* Called via smp_call_function_single(), on the target CPU */ /* Called via smp_call_function_single(), on the target CPU */
static void read_measured_perf_ctrs(void *_cur) static void read_measured_perf_ctrs(void *_cur)
{ {
struct perf_pair *cur = _cur; struct aperfmperf *am = _cur;
rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); get_aperfmperf(am);
rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
} }
/* /*
...@@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur) ...@@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur)
static unsigned int get_measured_perf(struct cpufreq_policy *policy, static unsigned int get_measured_perf(struct cpufreq_policy *policy,
unsigned int cpu) unsigned int cpu)
{ {
struct perf_pair readin, cur; struct aperfmperf perf;
unsigned int perf_percent; unsigned long ratio;
unsigned int retval; unsigned int retval;
if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
return 0; return 0;
cur.aperf.whole = readin.aperf.whole - ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
per_cpu(msr_data, cpu).saved_aperf; per_cpu(old_perf, cpu) = perf;
cur.mperf.whole = readin.mperf.whole -
per_cpu(msr_data, cpu).saved_mperf;
per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
#ifdef __i386__
/*
* We dont want to do 64 bit divide with 32 bit kernel
* Get an approximate value. Return failure in case we cannot get
* an approximate value.
*/
if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
int shift_count;
u32 h;
h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
shift_count = fls(h);
cur.aperf.whole >>= shift_count;
cur.mperf.whole >>= shift_count;
}
if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
int shift_count = 7;
cur.aperf.split.lo >>= shift_count;
cur.mperf.split.lo >>= shift_count;
}
if (cur.aperf.split.lo && cur.mperf.split.lo)
perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
else
perf_percent = 0;
#else
if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
int shift_count = 7;
cur.aperf.whole >>= shift_count;
cur.mperf.whole >>= shift_count;
}
if (cur.aperf.whole && cur.mperf.whole)
perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
else
perf_percent = 0;
#endif
retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
return retval; return retval;
} }
...@@ -731,12 +669,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) ...@@ -731,12 +669,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
acpi_processor_notify_smm(THIS_MODULE); acpi_processor_notify_smm(THIS_MODULE);
/* Check for APERF/MPERF support in hardware */ /* Check for APERF/MPERF support in hardware */
if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { if (cpu_has(c, X86_FEATURE_APERFMPERF))
unsigned int ecx;
ecx = cpuid_ecx(6);
if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
acpi_cpufreq_driver.getavg = get_measured_perf; acpi_cpufreq_driver.getavg = get_measured_perf;
}
dprintk("CPU%u - ACPI performance management activated.\n", cpu); dprintk("CPU%u - ACPI performance management activated.\n", cpu);
for (i = 0; i < perf->state_count; i++) for (i = 0; i < perf->state_count; i++)
......
...@@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ...@@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
} }
if (c->cpuid_level > 6) {
unsigned ecx = cpuid_ecx(6);
if (ecx & 0x01)
set_cpu_cap(c, X86_FEATURE_APERFMPERF);
}
if (cpu_has_xmm2) if (cpu_has_xmm2)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (cpu_has_ds) { if (cpu_has_ds) {
......
#include <linux/sched.h>
#include <linux/math64.h>
#include <linux/percpu.h>
#include <linux/irqflags.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched);
static unsigned long scale_aperfmperf(void)
{
struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched);
unsigned long ratio, flags;
local_irq_save(flags);
get_aperfmperf(&val);
local_irq_restore(flags);
ratio = calc_aperfmperf_ratio(old, &val);
*old = val;
return ratio;
}
unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
{
/*
* do aperf/mperf on the cpu level because it includes things
* like turbo mode, which are relevant to full cores.
*/
if (boot_cpu_has(X86_FEATURE_APERFMPERF))
return scale_aperfmperf();
/*
* maybe have something cpufreq here
*/
return default_scale_freq_power(sd, cpu);
}
unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
{
/*
* aperf/mperf already includes the smt gain
*/
if (boot_cpu_has(X86_FEATURE_APERFMPERF))
return SCHED_LOAD_SCALE;
return default_scale_smt_power(sd, cpu);
}
#endif
...@@ -190,6 +190,7 @@ extern unsigned long long time_sync_thresh; ...@@ -190,6 +190,7 @@ extern unsigned long long time_sync_thresh;
/* in tsk->state again */ /* in tsk->state again */
#define TASK_DEAD 64 #define TASK_DEAD 64
#define TASK_WAKEKILL 128 #define TASK_WAKEKILL 128
#define TASK_WAKING 256
/* Convenience macros for the sake of set_task_state */ /* Convenience macros for the sake of set_task_state */
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
...@@ -802,14 +803,14 @@ enum cpu_idle_type { ...@@ -802,14 +803,14 @@ enum cpu_idle_type {
#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
#define SD_WAKE_IDLE 0x0010 /* Wake to idle CPU on task wakeup */ #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
#define SD_WAKE_BALANCE 0x0040 /* Perform balancing at task wakeup */ #define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */
#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */ #define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
#define SD_WAKE_IDLE_FAR 0x0800 /* Gain latency sacrificing cache hit */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
enum powersavings_balance_level { enum powersavings_balance_level {
...@@ -991,6 +992,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag) ...@@ -991,6 +992,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag)
return 0; return 0;
} }
unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu);
unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
struct sched_domain_attr; struct sched_domain_attr;
...@@ -1002,6 +1006,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new, ...@@ -1002,6 +1006,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
} }
#endif /* !CONFIG_SMP */ #endif /* !CONFIG_SMP */
struct io_context; /* See blkdev.h */ struct io_context; /* See blkdev.h */
...@@ -1019,6 +1024,12 @@ struct uts_namespace; ...@@ -1019,6 +1024,12 @@ struct uts_namespace;
struct rq; struct rq;
struct sched_domain; struct sched_domain;
/*
* wake flags
*/
#define WF_SYNC 0x01 /* waker goes to sleep after wakup */
#define WF_FORK 0x02 /* child wakeup after fork */
struct sched_class { struct sched_class {
const struct sched_class *next; const struct sched_class *next;
...@@ -1026,13 +1037,13 @@ struct sched_class { ...@@ -1026,13 +1037,13 @@ struct sched_class {
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
void (*yield_task) (struct rq *rq); void (*yield_task) (struct rq *rq);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
struct task_struct * (*pick_next_task) (struct rq *rq); struct task_struct * (*pick_next_task) (struct rq *rq);
void (*put_prev_task) (struct rq *rq, struct task_struct *p); void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int (*select_task_rq)(struct task_struct *p, int sync); int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
struct rq *busiest, unsigned long max_load_move, struct rq *busiest, unsigned long max_load_move,
...@@ -1102,6 +1113,8 @@ struct sched_entity { ...@@ -1102,6 +1113,8 @@ struct sched_entity {
u64 start_runtime; u64 start_runtime;
u64 avg_wakeup; u64 avg_wakeup;
u64 avg_running;
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
u64 wait_start; u64 wait_start;
u64 wait_max; u64 wait_max;
......
...@@ -95,14 +95,12 @@ int arch_update_cpu_topology(void); ...@@ -95,14 +95,12 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \ | 1*SD_BALANCE_FORK \
| 0*SD_WAKE_IDLE \ | 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \ | 1*SD_WAKE_AFFINE \
| 1*SD_WAKE_BALANCE \
| 1*SD_SHARE_CPUPOWER \ | 1*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \ | 0*SD_SERIALIZE \
| 0*SD_WAKE_IDLE_FAR \
| 0*SD_PREFER_SIBLING \ | 0*SD_PREFER_SIBLING \
, \ , \
.last_balance = jiffies, \ .last_balance = jiffies, \
...@@ -122,20 +120,19 @@ int arch_update_cpu_topology(void); ...@@ -122,20 +120,19 @@ int arch_update_cpu_topology(void);
.imbalance_pct = 125, \ .imbalance_pct = 125, \
.cache_nice_tries = 1, \ .cache_nice_tries = 1, \
.busy_idx = 2, \ .busy_idx = 2, \
.wake_idx = 1, \ .wake_idx = 0, \
.forkexec_idx = 1, \ .forkexec_idx = 0, \
\ \
.flags = 1*SD_LOAD_BALANCE \ .flags = 1*SD_LOAD_BALANCE \
| 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \ | 1*SD_BALANCE_FORK \
| 1*SD_WAKE_IDLE \ | 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \ | 1*SD_WAKE_AFFINE \
| 1*SD_WAKE_BALANCE \ | 1*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_CPUPOWER \
| 1*SD_SHARE_PKG_RESOURCES \ | 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \ | 0*SD_SERIALIZE \
| 0*SD_WAKE_IDLE_FAR \
| sd_balance_for_mc_power() \ | sd_balance_for_mc_power() \
| sd_power_saving_flags() \ | sd_power_saving_flags() \
, \ , \
...@@ -155,21 +152,20 @@ int arch_update_cpu_topology(void); ...@@ -155,21 +152,20 @@ int arch_update_cpu_topology(void);
.cache_nice_tries = 1, \ .cache_nice_tries = 1, \
.busy_idx = 2, \ .busy_idx = 2, \
.idle_idx = 1, \ .idle_idx = 1, \
.newidle_idx = 2, \ .newidle_idx = 0, \
.wake_idx = 1, \ .wake_idx = 0, \
.forkexec_idx = 1, \ .forkexec_idx = 0, \
\ \
.flags = 1*SD_LOAD_BALANCE \ .flags = 1*SD_LOAD_BALANCE \
| 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \ | 1*SD_BALANCE_FORK \
| 1*SD_WAKE_IDLE \ | 0*SD_BALANCE_WAKE \
| 0*SD_WAKE_AFFINE \ | 1*SD_WAKE_AFFINE \
| 1*SD_WAKE_BALANCE \ | 1*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_CPUPOWER \
| 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \ | 0*SD_SERIALIZE \
| 0*SD_WAKE_IDLE_FAR \
| sd_balance_for_package_power() \ | sd_balance_for_package_power() \
| sd_power_saving_flags() \ | sd_power_saving_flags() \
, \ , \
...@@ -191,14 +187,12 @@ int arch_update_cpu_topology(void); ...@@ -191,14 +187,12 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_NEWIDLE \
| 0*SD_BALANCE_EXEC \ | 0*SD_BALANCE_EXEC \
| 0*SD_BALANCE_FORK \ | 0*SD_BALANCE_FORK \
| 0*SD_WAKE_IDLE \ | 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \ | 0*SD_WAKE_AFFINE \
| 0*SD_WAKE_BALANCE \
| 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \ | 1*SD_SERIALIZE \
| 1*SD_WAKE_IDLE_FAR \
| 0*SD_PREFER_SIBLING \ | 0*SD_PREFER_SIBLING \
, \ , \
.last_balance = jiffies, \ .last_balance = jiffies, \
......
...@@ -26,8 +26,8 @@ ...@@ -26,8 +26,8 @@
#include <asm/current.h> #include <asm/current.h>
typedef struct __wait_queue wait_queue_t; typedef struct __wait_queue wait_queue_t;
typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int sync, void *key); typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
int default_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
struct __wait_queue { struct __wait_queue {
unsigned int flags; unsigned int flags;
......
This diff is collapsed.
...@@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) ...@@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
PN(se.sum_exec_runtime); PN(se.sum_exec_runtime);
PN(se.avg_overlap); PN(se.avg_overlap);
PN(se.avg_wakeup); PN(se.avg_wakeup);
PN(se.avg_running);
nr_switches = p->nvcsw + p->nivcsw; nr_switches = p->nvcsw + p->nivcsw;
......
This diff is collapsed.
SCHED_FEAT(NEW_FAIR_SLEEPERS, 0) /*
* Disregards a certain amount of sleep time (sched_latency_ns) and
* considers the task to be running during that period. This gives it
* a service deficit on wakeup, allowing it to run sooner.
*/
SCHED_FEAT(FAIR_SLEEPERS, 1)
/*
* Only give sleepers 50% of their service deficit. This allows
* them to run sooner, but does not allow tons of sleepers to
* rip the spread apart.
*/
SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
/*
* By not normalizing the sleep time, heavy tasks get an effective
* longer period, and lighter task an effective shorter period they
* are considered running.
*/
SCHED_FEAT(NORMALIZED_SLEEPER, 0) SCHED_FEAT(NORMALIZED_SLEEPER, 0)
SCHED_FEAT(ADAPTIVE_GRAN, 1)
SCHED_FEAT(WAKEUP_PREEMPT, 1) /*
* Place new tasks ahead so that they do not starve already running
* tasks
*/
SCHED_FEAT(START_DEBIT, 1) SCHED_FEAT(START_DEBIT, 1)
/*
* Should wakeups try to preempt running tasks.
*/
SCHED_FEAT(WAKEUP_PREEMPT, 1)
/*
* Compute wakeup_gran based on task behaviour, clipped to
* [0, sched_wakeup_gran_ns]
*/
SCHED_FEAT(ADAPTIVE_GRAN, 1)
/*
* When converting the wakeup granularity to virtual time, do it such
* that heavier tasks preempting a lighter task have an edge.
*/
SCHED_FEAT(ASYM_GRAN, 1)
/*
* Always wakeup-preempt SYNC wakeups, see SYNC_WAKEUPS.
*/
SCHED_FEAT(WAKEUP_SYNC, 0)
/*
* Wakeup preempt based on task behaviour. Tasks that do not overlap
* don't get preempted.
*/
SCHED_FEAT(WAKEUP_OVERLAP, 0)
/*
* Wakeup preemption towards tasks that run short
*/
SCHED_FEAT(WAKEUP_RUNNING, 0)
/*
* Use the SYNC wakeup hint, pipes and the likes use this to indicate
* the remote end is likely to consume the data we just wrote, and
* therefore has cache benefit from being placed on the same cpu, see
* also AFFINE_WAKEUPS.
*/
SCHED_FEAT(SYNC_WAKEUPS, 1)
/*
* Based on load and program behaviour, see if it makes sense to place
* a newly woken task on the same cpu as the task that woke it --
* improve cache locality. Typically used with SYNC wakeups as
* generated by pipes and the like, see also SYNC_WAKEUPS.
*/
SCHED_FEAT(AFFINE_WAKEUPS, 1) SCHED_FEAT(AFFINE_WAKEUPS, 1)
/*
* Weaken SYNC hint based on overlap
*/
SCHED_FEAT(SYNC_LESS, 1)
/*
* Add SYNC hint based on overlap
*/
SCHED_FEAT(SYNC_MORE, 0)
/*
* Prefer to schedule the task we woke last (assuming it failed
* wakeup-preemption), since its likely going to consume data we
* touched, increases cache locality.
*/
SCHED_FEAT(NEXT_BUDDY, 0)
/*
* Prefer to schedule the task that ran last (when we did
* wake-preempt) as that likely will touch the same data, increases
* cache locality.
*/
SCHED_FEAT(LAST_BUDDY, 1)
/*
* Consider buddies to be cache hot, decreases the likelyness of a
* cache buddy being migrated away, increases cache locality.
*/
SCHED_FEAT(CACHE_HOT_BUDDY, 1) SCHED_FEAT(CACHE_HOT_BUDDY, 1)
SCHED_FEAT(SYNC_WAKEUPS, 1)
/*
* Use arch dependent cpu power functions
*/
SCHED_FEAT(ARCH_POWER, 0)
SCHED_FEAT(HRTICK, 0) SCHED_FEAT(HRTICK, 0)
SCHED_FEAT(DOUBLE_TICK, 0) SCHED_FEAT(DOUBLE_TICK, 0)
SCHED_FEAT(ASYM_GRAN, 1)
SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(LB_BIAS, 1)
SCHED_FEAT(LB_WAKEUP_UPDATE, 1) SCHED_FEAT(LB_SHARES_UPDATE, 1)
SCHED_FEAT(ASYM_EFF_LOAD, 1) SCHED_FEAT(ASYM_EFF_LOAD, 1)
SCHED_FEAT(WAKEUP_OVERLAP, 0)
SCHED_FEAT(LAST_BUDDY, 1) /*
* Spin-wait on mutex acquisition when the mutex owner is running on
* another cpu -- assumes that when the owner is running, it will soon
* release the lock. Decreases scheduling overhead.
*/
SCHED_FEAT(OWNER_SPIN, 1) SCHED_FEAT(OWNER_SPIN, 1)
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
*/ */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static int select_task_rq_idle(struct task_struct *p, int sync) static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
{ {
return task_cpu(p); /* IDLE tasks as never migrated */ return task_cpu(p); /* IDLE tasks as never migrated */
} }
...@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync) ...@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync)
/* /*
* Idle tasks are unconditionally rescheduled: * Idle tasks are unconditionally rescheduled:
*/ */
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync) static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
{ {
resched_task(rq->idle); resched_task(rq->idle);
} }
......
...@@ -938,10 +938,13 @@ static void yield_task_rt(struct rq *rq) ...@@ -938,10 +938,13 @@ static void yield_task_rt(struct rq *rq)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static int find_lowest_rq(struct task_struct *task); static int find_lowest_rq(struct task_struct *task);
static int select_task_rq_rt(struct task_struct *p, int sync) static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
{ {
struct rq *rq = task_rq(p); struct rq *rq = task_rq(p);
if (sd_flag != SD_BALANCE_WAKE)
return smp_processor_id();
/* /*
* If the current task is an RT task, then * If the current task is an RT task, then
* try to see if we can wake this RT task up on another * try to see if we can wake this RT task up on another
...@@ -999,7 +1002,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) ...@@ -999,7 +1002,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
/* /*
* Preempt the current task with a newly woken task if needed: * Preempt the current task with a newly woken task if needed:
*/ */
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync) static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
{ {
if (p->prio < rq->curr->prio) { if (p->prio < rq->curr->prio) {
resched_task(rq->curr); resched_task(rq->curr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment