perf_counter.c 17.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Performance counter x86 architecture code
 *
 *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
 *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
 *
 *  For licencing details see kernel-base/COPYING
 */

#include <linux/perf_counter.h>
#include <linux/capability.h>
#include <linux/notifier.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
15
#include <linux/module.h>
16 17 18
#include <linux/kdebug.h>
#include <linux/sched.h>

19
#include <asm/perf_counter.h>
20 21 22 23 24 25 26
#include <asm/apic.h>

static bool perf_counters_initialized __read_mostly;

/*
 * Number of (generic) HW counters:
 */
27 28
static int nr_counters_generic __read_mostly;
static u64 perf_counter_mask __read_mostly;
29
static u64 counter_value_mask __read_mostly;
30

31
static int nr_counters_fixed __read_mostly;
32

33
struct cpu_hw_counters {
34 35
	struct perf_counter	*counters[X86_PMC_IDX_MAX];
	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
36 37 38 39 40 41 42
};

/*
 * Intel PerfMon v3. Used on Core2 and later.
 */
static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);

43
static const int intel_perfmon_event_map[] =
44
{
45
  [PERF_COUNT_CPU_CYCLES]		= 0x003c,
46 47 48 49 50
  [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
  [PERF_COUNT_CACHE_REFERENCES]		= 0x4f2e,
  [PERF_COUNT_CACHE_MISSES]		= 0x412e,
  [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
  [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
51
  [PERF_COUNT_BUS_CYCLES]		= 0x013c,
52 53
};

54
static const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
55

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
/*
 * Propagate counter elapsed time into the generic counter.
 * Can only be executed on the CPU where the counter is active.
 * Returns the delta events processed.
 */
static void
x86_perf_counter_update(struct perf_counter *counter,
			struct hw_perf_counter *hwc, int idx)
{
	u64 prev_raw_count, new_raw_count, delta;

	/*
	 * Careful: an NMI might modify the previous counter value.
	 *
	 * Our tactic to handle this is to first atomically read and
	 * exchange a new raw count - then add that new-prev delta
	 * count to the generic counter atomically:
	 */
again:
	prev_raw_count = atomic64_read(&hwc->prev_count);
	rdmsrl(hwc->counter_base + idx, new_raw_count);

	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
					new_raw_count) != prev_raw_count)
		goto again;

	/*
	 * Now we have the new raw value and have updated the prev
	 * timestamp already. We can now calculate the elapsed delta
	 * (counter-)time and add that to the generic counter.
	 *
	 * Careful, not all hw sign-extends above the physical width
	 * of the count, so we do that by clipping the delta to 32 bits:
	 */
	delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);

	atomic64_add(delta, &counter->count);
	atomic64_sub(delta, &hwc->period_left);
}

96 97 98
/*
 * Setup the hardware configuration for a given hw_event_type
 */
Ingo Molnar's avatar
Ingo Molnar committed
99
static int __hw_perf_counter_init(struct perf_counter *counter)
100
{
101
	struct perf_counter_hw_event *hw_event = &counter->hw_event;
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
	struct hw_perf_counter *hwc = &counter->hw;

	if (unlikely(!perf_counters_initialized))
		return -EINVAL;

	/*
	 * Count user events, and generate PMC IRQs:
	 * (keep 'enabled' bit clear for now)
	 */
	hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT;

	/*
	 * If privileged enough, count OS events too, and allow
	 * NMI events as well:
	 */
	hwc->nmi = 0;
	if (capable(CAP_SYS_ADMIN)) {
		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
120
		if (hw_event->nmi)
121 122 123
			hwc->nmi = 1;
	}

124
	hwc->irq_period		= hw_event->irq_period;
125 126 127 128 129
	/*
	 * Intel PMCs cannot be accessed sanely above 32 bit width,
	 * so we install an artificial 1<<31 period regardless of
	 * the generic counter period:
	 */
130
	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
131 132
		hwc->irq_period = 0x7FFFFFFF;

133
	atomic64_set(&hwc->period_left, hwc->irq_period);
134 135

	/*
136
	 * Raw event type provide the config in the event structure
137
	 */
138 139
	if (hw_event->raw) {
		hwc->config |= hw_event->type;
140
	} else {
141
		if (hw_event->type >= max_intel_perfmon_events)
142 143 144 145
			return -EINVAL;
		/*
		 * The generic map:
		 */
146
		hwc->config |= intel_perfmon_event_map[hw_event->type];
147 148 149 150 151 152 153 154
	}
	counter->wakeup_pending = 0;

	return 0;
}

void hw_perf_enable_all(void)
{
155 156 157
	if (unlikely(!perf_counters_initialized))
		return;

158
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask);
159 160
}

161
u64 hw_perf_save_disable(void)
162 163 164
{
	u64 ctrl;

165 166 167
	if (unlikely(!perf_counters_initialized))
		return 0;

168
	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
169
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
170

171
	return ctrl;
172
}
173
EXPORT_SYMBOL_GPL(hw_perf_save_disable);
174

175 176
void hw_perf_restore(u64 ctrl)
{
177 178 179
	if (unlikely(!perf_counters_initialized))
		return;

180
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
181 182 183
}
EXPORT_SYMBOL_GPL(hw_perf_restore);

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
static inline void
__pmc_fixed_disable(struct perf_counter *counter,
		    struct hw_perf_counter *hwc, unsigned int __idx)
{
	int idx = __idx - X86_PMC_IDX_FIXED;
	u64 ctrl_val, mask;
	int err;

	mask = 0xfULL << (idx * 4);

	rdmsrl(hwc->config_base, ctrl_val);
	ctrl_val &= ~mask;
	err = checking_wrmsrl(hwc->config_base, ctrl_val);
}

199
static inline void
200
__pmc_generic_disable(struct perf_counter *counter,
201
			   struct hw_perf_counter *hwc, unsigned int idx)
202
{
203 204
	int err;

205 206 207
	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
		return __pmc_fixed_disable(counter, hwc, idx);

208
	err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
209 210
}

211
static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
212

213 214 215 216 217 218 219
/*
 * Set the next IRQ period, based on the hwc->period_left value.
 * To be called with the counter disabled in hw:
 */
static void
__hw_perf_counter_set_period(struct perf_counter *counter,
			     struct hw_perf_counter *hwc, int idx)
220
{
221
	s64 left = atomic64_read(&hwc->period_left);
222
	s32 period = hwc->irq_period;
223
	int err;
224 225 226 227 228 229 230 231 232 233 234 235 236

	/*
	 * If we are way outside a reasoable range then just skip forward:
	 */
	if (unlikely(left <= -period)) {
		left = period;
		atomic64_set(&hwc->period_left, left);
	}

	if (unlikely(left <= 0)) {
		left += period;
		atomic64_set(&hwc->period_left, left);
	}
237

238 239 240 241 242 243
	per_cpu(prev_left[idx], smp_processor_id()) = left;

	/*
	 * The hw counter starts counting from this counter offset,
	 * mark it to be able to extra future deltas:
	 */
244
	atomic64_set(&hwc->prev_count, (u64)-left);
245

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
	err = checking_wrmsrl(hwc->counter_base + idx,
			     (u64)(-left) & counter_value_mask);
}

static inline void
__pmc_fixed_enable(struct perf_counter *counter,
		   struct hw_perf_counter *hwc, unsigned int __idx)
{
	int idx = __idx - X86_PMC_IDX_FIXED;
	u64 ctrl_val, bits, mask;
	int err;

	/*
	 * Enable IRQ generation (0x8) and ring-3 counting (0x2),
	 * and enable ring-0 counting if allowed:
	 */
	bits = 0x8ULL | 0x2ULL;
	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
		bits |= 0x1;
	bits <<= (idx * 4);
	mask = 0xfULL << (idx * 4);

	rdmsrl(hwc->config_base, ctrl_val);
	ctrl_val &= ~mask;
	ctrl_val |= bits;
	err = checking_wrmsrl(hwc->config_base, ctrl_val);
272 273
}

274
static void
275
__pmc_generic_enable(struct perf_counter *counter,
276
			  struct hw_perf_counter *hwc, int idx)
277
{
278 279 280
	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
		return __pmc_fixed_enable(counter, hwc, idx);

281 282
	wrmsr(hwc->config_base + idx,
	      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
283 284
}

285 286
static int
fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
287
{
288 289 290 291 292 293 294 295 296 297 298 299 300 301
	unsigned int event;

	if (unlikely(hwc->nmi))
		return -1;

	event = hwc->config & ARCH_PERFMON_EVENT_MASK;

	if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_INSTRUCTIONS]))
		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
	if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_CPU_CYCLES]))
		return X86_PMC_IDX_FIXED_CPU_CYCLES;
	if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_BUS_CYCLES]))
		return X86_PMC_IDX_FIXED_BUS_CYCLES;

302 303 304
	return -1;
}

305 306 307
/*
 * Find a PMC slot for the freshly enabled / scheduled in counter:
 */
308
static int pmc_generic_enable(struct perf_counter *counter)
309 310 311
{
	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
	struct hw_perf_counter *hwc = &counter->hw;
312
	int idx;
313

314 315 316 317 318 319 320 321
	idx = fixed_mode_idx(counter, hwc);
	if (idx >= 0) {
		/*
		 * Try to get the fixed counter, if that is already taken
		 * then try to get a generic counter:
		 */
		if (test_and_set_bit(idx, cpuc->used))
			goto try_generic;
322

323 324 325 326 327 328 329
		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
		/*
		 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
		 */
		hwc->counter_base =
			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
330
		hwc->idx = idx;
331 332 333 334 335 336 337 338 339 340 341 342 343 344
	} else {
		idx = hwc->idx;
		/* Try to get the previous generic counter again */
		if (test_and_set_bit(idx, cpuc->used)) {
try_generic:
			idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
			if (idx == nr_counters_generic)
				return -EAGAIN;

			set_bit(idx, cpuc->used);
			hwc->idx = idx;
		}
		hwc->config_base  = MSR_ARCH_PERFMON_EVENTSEL0;
		hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
345 346 347 348
	}

	perf_counters_lapic_init(hwc->nmi);

349
	__pmc_generic_disable(counter, hwc, idx);
350

351
	cpuc->counters[idx] = counter;
352 353 354 355
	/*
	 * Make it visible before enabling the hw:
	 */
	smp_wmb();
356

357
	__hw_perf_counter_set_period(counter, hwc, idx);
358
	__pmc_generic_enable(counter, hwc, idx);
359 360

	return 0;
361 362 363 364
}

void perf_counter_print_debug(void)
{
365
	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
366
	struct cpu_hw_counters *cpuc;
367 368
	int cpu, idx;

369
	if (!nr_counters_generic)
370
		return;
371 372 373 374

	local_irq_disable();

	cpu = smp_processor_id();
375
	cpuc = &per_cpu(cpu_hw_counters, cpu);
376

377 378 379
	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
	rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
380
	rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
381 382 383 384 385

	printk(KERN_INFO "\n");
	printk(KERN_INFO "CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
	printk(KERN_INFO "CPU#%d: status:     %016llx\n", cpu, status);
	printk(KERN_INFO "CPU#%d: overflow:   %016llx\n", cpu, overflow);
386
	printk(KERN_INFO "CPU#%d: fixed:      %016llx\n", cpu, fixed);
387
	printk(KERN_INFO "CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
388

389
	for (idx = 0; idx < nr_counters_generic; idx++) {
390 391
		rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
		rdmsrl(MSR_ARCH_PERFMON_PERFCTR0  + idx, pmc_count);
392

393
		prev_left = per_cpu(prev_left[idx], cpu);
394

395
		printk(KERN_INFO "CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
396
			cpu, idx, pmc_ctrl);
397
		printk(KERN_INFO "CPU#%d:   gen-PMC%d count: %016llx\n",
398
			cpu, idx, pmc_count);
399
		printk(KERN_INFO "CPU#%d:   gen-PMC%d left:  %016llx\n",
400
			cpu, idx, prev_left);
401
	}
402 403 404 405 406 407
	for (idx = 0; idx < nr_counters_fixed; idx++) {
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);

		printk(KERN_INFO "CPU#%d: fixed-PMC%d count: %016llx\n",
			cpu, idx, pmc_count);
	}
408 409 410
	local_irq_enable();
}

411
static void pmc_generic_disable(struct perf_counter *counter)
412 413 414 415 416
{
	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
	struct hw_perf_counter *hwc = &counter->hw;
	unsigned int idx = hwc->idx;

417
	__pmc_generic_disable(counter, hwc, idx);
418 419

	clear_bit(idx, cpuc->used);
420
	cpuc->counters[idx] = NULL;
421 422 423 424 425
	/*
	 * Make sure the cleared pointer becomes visible before we
	 * (potentially) free the counter:
	 */
	smp_wmb();
426

427 428 429 430 431
	/*
	 * Drain the remaining delta count out of a counter
	 * that we are disabling:
	 */
	x86_perf_counter_update(counter, hwc, idx);
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
}

static void perf_store_irq_data(struct perf_counter *counter, u64 data)
{
	struct perf_data *irqdata = counter->irqdata;

	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
		irqdata->overrun++;
	} else {
		u64 *p = (u64 *) &irqdata->data[irqdata->len];

		*p = data;
		irqdata->len += sizeof(u64);
	}
}

448
/*
449 450
 * Save and restart an expired counter. Called by NMI contexts,
 * so it has to be careful about preempting normal counter ops:
451
 */
452 453 454 455 456
static void perf_save_and_restart(struct perf_counter *counter)
{
	struct hw_perf_counter *hwc = &counter->hw;
	int idx = hwc->idx;

457 458
	x86_perf_counter_update(counter, hwc, idx);
	__hw_perf_counter_set_period(counter, hwc, idx);
459

460
	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
461
		__pmc_generic_enable(counter, hwc, idx);
462 463 464
}

static void
465
perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
466
{
467
	struct perf_counter *counter, *group_leader = sibling->group_leader;
468

469
	/*
470
	 * Store sibling timestamps (if any):
471 472
	 */
	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
473

474
		x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
475
		perf_store_irq_data(sibling, counter->hw_event.type);
476
		perf_store_irq_data(sibling, atomic64_read(&counter->count));
477 478 479 480 481 482 483 484 485 486
	}
}

/*
 * This handler is triggered by the local APIC, so the APIC IRQ handling
 * rules apply:
 */
static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
{
	int bit, cpu = smp_processor_id();
487
	u64 ack, status, saved_global;
488
	struct cpu_hw_counters *cpuc;
489 490

	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
491 492

	/* Disable counters globally */
493
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
494 495 496 497
	ack_APIC_irq();

	cpuc = &per_cpu(cpu_hw_counters, cpu);

498 499 500 501
	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
	if (!status)
		goto out;

502 503
again:
	ack = status;
504
	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
505
		struct perf_counter *counter = cpuc->counters[bit];
506 507 508 509 510 511 512

		clear_bit(bit, (unsigned long *) &status);
		if (!counter)
			continue;

		perf_save_and_restart(counter);

513
		switch (counter->hw_event.record_type) {
514 515 516 517 518 519 520 521 522 523 524
		case PERF_RECORD_SIMPLE:
			continue;
		case PERF_RECORD_IRQ:
			perf_store_irq_data(counter, instruction_pointer(regs));
			break;
		case PERF_RECORD_GROUP:
			perf_handle_group(counter, &status, &ack);
			break;
		}
		/*
		 * From NMI context we cannot call into the scheduler to
525
		 * do a task wakeup - but we mark these generic as
526 527 528 529 530 531 532 533 534 535
		 * wakeup_pending and initate a wakeup callback:
		 */
		if (nmi) {
			counter->wakeup_pending = 1;
			set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
		} else {
			wake_up(&counter->waitq);
		}
	}

536
	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
537 538 539 540 541 542 543

	/*
	 * Repeat if there is more work to be done:
	 */
	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
	if (status)
		goto again;
544
out:
545
	/*
546
	 * Restore - do not reenable when global enable is off:
547
	 */
548
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
549 550 551 552 553
}

void smp_perf_counter_interrupt(struct pt_regs *regs)
{
	irq_enter();
554
	inc_irq_stat(apic_perf_irqs);
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
	apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
	__smp_perf_counter_interrupt(regs, 0);

	irq_exit();
}

/*
 * This handler is triggered by NMI contexts:
 */
void perf_counter_notify(struct pt_regs *regs)
{
	struct cpu_hw_counters *cpuc;
	unsigned long flags;
	int bit, cpu;

	local_irq_save(flags);
	cpu = smp_processor_id();
	cpuc = &per_cpu(cpu_hw_counters, cpu);

574 575
	for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
		struct perf_counter *counter = cpuc->counters[bit];
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633

		if (!counter)
			continue;

		if (counter->wakeup_pending) {
			counter->wakeup_pending = 0;
			wake_up(&counter->waitq);
		}
	}

	local_irq_restore(flags);
}

void __cpuinit perf_counters_lapic_init(int nmi)
{
	u32 apic_val;

	if (!perf_counters_initialized)
		return;
	/*
	 * Enable the performance counter vector in the APIC LVT:
	 */
	apic_val = apic_read(APIC_LVTERR);

	apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
	if (nmi)
		apic_write(APIC_LVTPC, APIC_DM_NMI);
	else
		apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
	apic_write(APIC_LVTERR, apic_val);
}

static int __kprobes
perf_counter_nmi_handler(struct notifier_block *self,
			 unsigned long cmd, void *__args)
{
	struct die_args *args = __args;
	struct pt_regs *regs;

	if (likely(cmd != DIE_NMI_IPI))
		return NOTIFY_DONE;

	regs = args->regs;

	apic_write(APIC_LVTPC, APIC_DM_NMI);
	__smp_perf_counter_interrupt(regs, 1);

	return NOTIFY_STOP;
}

static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
	.notifier_call		= perf_counter_nmi_handler
};

void __init init_hw_perf_counters(void)
{
	union cpuid10_eax eax;
	unsigned int ebx;
634 635
	unsigned int unused;
	union cpuid10_edx edx;
636 637 638 639 640 641 642 643

	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
		return;

	/*
	 * Check whether the Architectural PerfMon supports
	 * Branch Misses Retired Event or not.
	 */
644
	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
645 646 647 648 649
	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
		return;

	printk(KERN_INFO "Intel Performance Monitoring support detected.\n");

650 651
	printk(KERN_INFO "... version:         %d\n", eax.split.version_id);
	printk(KERN_INFO "... num counters:    %d\n", eax.split.num_counters);
652 653 654
	nr_counters_generic = eax.split.num_counters;
	if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
		nr_counters_generic = X86_PMC_MAX_GENERIC;
655
		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
656
			nr_counters_generic, X86_PMC_MAX_GENERIC);
657
	}
658 659
	perf_counter_mask = (1 << nr_counters_generic) - 1;
	perf_max_counters = nr_counters_generic;
660

661
	printk(KERN_INFO "... bit width:       %d\n", eax.split.bit_width);
662 663 664
	counter_value_mask = (1ULL << eax.split.bit_width) - 1;
	printk(KERN_INFO "... value mask:      %016Lx\n", counter_value_mask);

665 666
	printk(KERN_INFO "... mask length:     %d\n", eax.split.mask_length);

667 668 669
	nr_counters_fixed = edx.split.num_counters_fixed;
	if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
		nr_counters_fixed = X86_PMC_MAX_FIXED;
670
		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
671
			nr_counters_fixed, X86_PMC_MAX_FIXED);
672
	}
673 674 675
	printk(KERN_INFO "... fixed counters:  %d\n", nr_counters_fixed);

	perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
676

677
	printk(KERN_INFO "... counter mask:    %016Lx\n", perf_counter_mask);
678 679
	perf_counters_initialized = true;

680 681 682
	perf_counters_lapic_init(0);
	register_die_notifier(&perf_counter_nmi_notifier);
}
Ingo Molnar's avatar
Ingo Molnar committed
683

684
static void pmc_generic_read(struct perf_counter *counter)
685 686 687 688
{
	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
}

689
static const struct hw_perf_counter_ops x86_perf_counter_ops = {
Ingo Molnar's avatar
Ingo Molnar committed
690 691 692
	.enable		= pmc_generic_enable,
	.disable	= pmc_generic_disable,
	.read		= pmc_generic_read,
Ingo Molnar's avatar
Ingo Molnar committed
693 694
};

695 696
const struct hw_perf_counter_ops *
hw_perf_counter_init(struct perf_counter *counter)
Ingo Molnar's avatar
Ingo Molnar committed
697 698 699 700 701 702 703 704 705
{
	int err;

	err = __hw_perf_counter_init(counter);
	if (err)
		return NULL;

	return &x86_perf_counter_ops;
}