• Peter Zijlstra's avatar
    x86: sched: provide arch implementations using aperf/mperf · d65d153b
    Peter Zijlstra authored
    APERF/MPERF support for cpu_power.
    
    APERF/MPERF is arch defined to be a relative scale of work capacity
    per logical cpu, this is assumed to include SMT and Turbo mode.
    
    APERF/MPERF are specified to both reset to 0 when either counter
    wraps, which is highly inconvenient, since that'll give a blimp when
    that happens. The manual specifies writing 0 to the counters after
    each read, but that's 1) too expensive, and 2) destroys the
    possibility of sharing these counters with other users, so we live
    with the blimp - the other existing user does too.
    Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: default avatarDinakar Guniguntala <dino@in.ibm.com>
    Cc: John Stultz <johnstul@us.ibm.com>
    Cc: Darren Hart <dvhltc@us.ibm.com>
    Cc: John Kacur <jkacur@redhat.com>
    Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
    d65d153b
sched.c 1.24 KB
#include <linux/sched.h>
#include <linux/math64.h>
#include <linux/percpu.h>
#include <linux/irqflags.h>

#include <asm/cpufeature.h>
#include <asm/processor.h>

static DEFINE_PER_CPU(struct aperfmperf, old_aperfmperf);

static unsigned long scale_aperfmperf(void)
{
	struct aperfmperf cur, val, *old = &__get_cpu_var(old_aperfmperf);
	unsigned long ratio = SCHED_LOAD_SCALE;
	unsigned long flags;

	local_irq_save(flags);
	get_aperfmperf(&val);
	local_irq_restore(flags);

	cur = val;
	cur.aperf -= old->aperf;
	cur.mperf -= old->mperf;
	*old = val;

	cur.mperf >>= SCHED_LOAD_SHIFT;
	if (cur.mperf)
		ratio = div_u64(cur.aperf, cur.mperf);

	return ratio;
}

unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
{
	/*
	 * do aperf/mperf on the cpu level because it includes things
	 * like turbo mode, which are relevant to full cores.
	 */
	if (boot_cpu_has(X86_FEATURE_APERFMPERF))
		return scale_aperfmperf();

	/*
	 * maybe have something cpufreq here
	 */

	return default_scale_freq_power(sd, cpu);
}

unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
{
	/*
	 * aperf/mperf already includes the smt gain
	 */
	if (boot_cpu_has(X86_FEATURE_APERFMPERF))
		return SCHED_LOAD_SCALE;

	return default_scale_smt_power(sd, cpu);
}