• Ingo Molnar's avatar
    perf stat: Count branches first · dd86e72a
    Ingo Molnar authored
    Count branches first, cache-misses second. The reason is that
    on x86 branches are not counted by all counters on all CPUs.
    
    Before:
    
     Performance counter stats for 'ls':
    
           0.756653  task-clock-msecs         #      0.802 CPUs
                  0  context-switches         #      0.000 M/sec
                  0  CPU-migrations           #      0.000 M/sec
                250  page-faults              #      0.330 M/sec
            2375725  cycles                   #   3139.781 M/sec
            1628129  instructions             #      0.685 IPC
              19643  cache-references         #     25.960 M/sec
               4608  cache-misses             #      6.090 M/sec
             342532  branches                 #    452.694 M/sec
      <not counted>  branch-misses
    
        0.000943356  seconds time elapsed
    
    After:
    
     Performance counter stats for 'ls':
    
           1.056734  task-clock-msecs         #      0.859 CPUs
                  0  context-switches         #      0.000 M/sec
                  0  CPU-migrations           #      0.000 M/sec
                259  page-faults              #      0.245 M/sec
            3345932  cycles                   #   3166.295 M/sec
            3074090  instructions             #      0.919 IPC
             616928  branches                 #    583.806 M/sec
              39279  branch-misses            #      6.367 %
              21312  cache-references         #     20.168 M/sec
               3661  cache-misses             #      3.464 M/sec
    
        0.001230551  seconds time elapsed
    
    (also prettify the printout of branch misses, in case it's
     getting scaled.)
    
    Cc: Tim Blechmann <tim@klingt.org>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    LKML-Reference: <4ADC3975.8050109@klingt.org>
    Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
    ---
     tools/perf/builtin-stat.c |    2 ++
     1 files changed, 2 insertions(+), 0 deletions(-)
    
    diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
    index c373683..95a55ea 100644
    --- a/tools/perf/builtin-stat.c
    +++ b/tools/perf/builtin-stat.c
    @@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = {
       { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS	},
       { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
       { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES	},
    +  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
    +  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES	},
    
     };
    ---
     tools/perf/builtin-stat.c |   20 ++++++++++----------
     1 files changed, 10 insertions(+), 10 deletions(-)
    
    diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
    index 95a55ea..90e0a26 100644
    --- a/tools/perf/builtin-stat.c
    +++ b/tools/perf/builtin-stat.c
    @@ -50,17 +50,17 @@
    
     static struct perf_event_attr default_attrs[] = {
    
    -  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK	},
    -  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
    -  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS	},
    -  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS	},
    -
    -  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES	},
    -  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS	},
    -  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
    -  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES	},
    -  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
    -  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES	},
    +  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
    +  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
    +  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
    +  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
    +
    +  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
    +  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
    +  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES	},
    +  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES		},
    +  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
    +  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
    
     };
    dd86e72a
builtin-stat.c 12.5 KB