Discussion:
[PATCH 4/8] perf, tools: Only print base source file for srcline
Andi Kleen
2014-09-26 23:37:12 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

For perf report with --sort srcline only print the base source file
name. This makes the results generally fit much better to the
screen. The path is usually not that useful anyways because it is
often from different systems.

Signed-off-by: Andi Kleen <***@linux.intel.com>
---
tools/perf/util/srcline.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index c6a7cdc..ac877f9 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -274,7 +274,7 @@ char *get_srcline(struct dso *dso, unsigned long addr)
if (!addr2line(dso_name, addr, &file, &line, dso))
goto out;

- if (asprintf(&srcline, "%s:%u", file, line) < 0) {
+ if (asprintf(&srcline, "%s:%u", basename(file), line) < 0) {
free(file);
goto out;
}
--
1.9.3
Andi Kleen
2014-09-26 23:37:14 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

When the source line is not found fall back to sym + offset.
This is generally much more useful than a raw address.
For this we need to pass in the symbol from the caller.
For some callers it's awkward to compute, so we stay
at the old behaviour.

Signed-off-by: Andi Kleen <***@linux.intel.com>
---
tools/perf/util/annotate.c | 2 +-
tools/perf/util/callchain.c | 3 ++-
tools/perf/util/map.c | 2 +-
tools/perf/util/sort.c | 6 ++++--
tools/perf/util/srcline.c | 12 +++++++++---
tools/perf/util/util.h | 4 +++-
6 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f49f9a5..ccd6f3a 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1190,7 +1190,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
goto next;

offset = start + i;
- src_line->path = get_srcline(map->dso, offset);
+ src_line->path = get_srcline(map->dso, offset, NULL, false);
insert_source_line(&tmp_root, src_line);

next:
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index b7ee210..8c0cea0 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -666,7 +666,8 @@ char *callchain_list__sym_name(struct callchain_list *cl,
cl->ms.map && !cl->srcline)
cl->srcline = get_srcline(cl->ms.map->dso,
map__rip_2objdump(cl->ms.map,
- cl->ip));
+ cl->ip),
+ cl->ms.sym, false);
if (cl->srcline)
printed = scnprintf(bf, bfsize, "%s %s",
cl->ms.sym->name, cl->srcline);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index b709059..4f3cdbc 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -360,7 +360,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,

if (map && map->dso) {
srcline = get_srcline(map->dso,
- map__rip_2objdump(map, addr));
+ map__rip_2objdump(map, addr), NULL, true);
if (srcline != SRCLINE_UNKNOWN)
ret = fprintf(fp, "%s%s", prefix, srcline);
free_srcline(srcline);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 289df9d..505700e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -291,7 +291,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
else {
struct map *map = left->ms.map;
left->srcline = get_srcline(map->dso,
- map__rip_2objdump(map, left->ip));
+ map__rip_2objdump(map, left->ip),
+ left->ms.sym, true);
}
}
if (!right->srcline) {
@@ -300,7 +301,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
else {
struct map *map = right->ms.map;
right->srcline = get_srcline(map->dso,
- map__rip_2objdump(map, right->ip));
+ map__rip_2objdump(map, right->ip),
+ right->ms.sym, true);
}
}
return strcmp(right->srcline, left->srcline);
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index ac877f9..36a7aff 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -8,12 +8,13 @@
#include "util/util.h"
#include "util/debug.h"

+#include "symbol.h"
+
#ifdef HAVE_LIBBFD_SUPPORT

/*
* Implement addr2line using libbfd.
*/
-#define PACKAGE "perf"
#include <bfd.h>

struct a2l_data {
@@ -250,7 +251,8 @@ void dso__free_a2l(struct dso *dso __maybe_unused)
*/
#define A2L_FAIL_LIMIT 123

-char *get_srcline(struct dso *dso, unsigned long addr)
+char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
+ bool show_sym)
{
char *file = NULL;
unsigned line = 0;
@@ -289,7 +291,11 @@ out:
dso->has_srcline = 0;
dso__free_a2l(dso);
}
- if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0)
+ if (sym) {
+ if (asprintf(&srcline, "%s+%ld", show_sym ? sym->name : "",
+ addr - sym->start) < 0)
+ return SRCLINE_UNKNOWN;
+ } else if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0)
return SRCLINE_UNKNOWN;
return srcline;
}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 80bfdaa..9855c4f 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -338,8 +338,10 @@ static inline int path__join3(char *bf, size_t size,
}

struct dso;
+struct symbol;

-char *get_srcline(struct dso *dso, unsigned long addr);
+char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
+ bool show_sym);
void free_srcline(char *srcline);

int filename__read_int(const char *filename, int *value);
--
1.9.3
Andi Kleen
2014-09-26 23:37:13 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

With srcline key/sort'ing it's useful to have line numbers
in the annotate window. This patch implements this.

Use objdump -l to request the line numbers and
save them in the line structure. Then the browser
displays them for source lines.

The line numbers are not displayed by default, but can be
toggled on with 'k'

There is one unfortunate problem with this setup. For
lines not containing source and which are outside functions
objdump -l reports line numbers off by a few: it always reports
the first line number in the next function even for lines
that are outside the function.
I haven't found a nice way to detect/correct this. Probably objdump
has to be fixed.
See https://sourceware.org/bugzilla/show_bug.cgi?id=16433

The line numbers are still useful even with these problems,
as most are correct and the ones which are not are nearby.

Signed-off-by: Andi Kleen <***@linux.intel.com>
---
tools/perf/ui/browsers/annotate.c | 13 ++++++++++++-
tools/perf/util/annotate.c | 30 +++++++++++++++++++++++++-----
tools/perf/util/annotate.h | 1 +
3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index f0697a3..8df6787 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -27,6 +27,7 @@ static struct annotate_browser_opt {
bool hide_src_code,
use_offset,
jump_arrows,
+ show_linenr,
show_nr_jumps;
} annotate_browser__opts = {
.use_offset = true,
@@ -128,7 +129,11 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
if (!*dl->line)
slsmg_write_nstring(" ", width - pcnt_width);
else if (dl->offset == -1) {
- printed = scnprintf(bf, sizeof(bf), "%*s ",
+ if (dl->line_nr && annotate_browser__opts.show_linenr)
+ printed = scnprintf(bf, sizeof(bf), "%*s %-5d ",
+ ab->addr_width, " ", dl->line_nr);
+ else
+ printed = scnprintf(bf, sizeof(bf), "%*s ",
ab->addr_width, " ");
slsmg_write_nstring(bf, printed);
slsmg_write_nstring(dl->line, width - printed - pcnt_width + 1);
@@ -733,6 +738,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
"o Toggle disassembler output/simplified view\n"
"s Toggle source code view\n"
"/ Search string\n"
+ "k Toggle line numbers\n"
"r Run available scripts\n"
"? Search string backwards\n");
continue;
@@ -741,6 +747,10 @@ static int annotate_browser__run(struct annotate_browser *browser,
script_browse(NULL);
continue;
}
+ case 'k':
+ annotate_browser__opts.show_linenr =
+ !annotate_browser__opts.show_linenr;
+ break;
case 'H':
nd = browser->curr_hot;
break;
@@ -984,6 +994,7 @@ static struct annotate_config {
} annotate__configs[] = {
ANNOTATE_CFG(hide_src_code),
ANNOTATE_CFG(jump_arrows),
+ ANNOTATE_CFG(show_linenr),
ANNOTATE_CFG(show_nr_jumps),
ANNOTATE_CFG(use_offset),
};
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 3643752..f49f9a5 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -17,11 +17,13 @@
#include "debug.h"
#include "annotate.h"
#include "evsel.h"
+#include <regex.h>
#include <pthread.h>
#include <linux/bitops.h>

const char *disassembler_style;
const char *objdump_path;
+static regex_t file_lineno;

static struct ins *ins__find(const char *name);
static int disasm_line__parse(char *line, char **namep, char **rawp);
@@ -570,13 +572,15 @@ out_free_name:
return -1;
}

-static struct disasm_line *disasm_line__new(s64 offset, char *line, size_t privsize)
+static struct disasm_line *disasm_line__new(s64 offset, char *line,
+ size_t privsize, int line_nr)
{
struct disasm_line *dl = zalloc(sizeof(*dl) + privsize);

if (dl != NULL) {
dl->offset = offset;
dl->line = strdup(line);
+ dl->line_nr = line_nr;
if (dl->line == NULL)
goto out_delete;

@@ -788,13 +792,15 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
* The ops.raw part will be parsed further according to type of the instruction.
*/
static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
- FILE *file, size_t privsize)
+ FILE *file, size_t privsize,
+ int *line_nr)
{
struct annotation *notes = symbol__annotation(sym);
struct disasm_line *dl;
char *line = NULL, *parsed_line, *tmp, *tmp2, *c;
size_t line_len;
s64 line_ip, offset = -1;
+ regmatch_t match[2];

if (getline(&line, &line_len, file) < 0)
return -1;
@@ -812,6 +818,12 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
line_ip = -1;
parsed_line = line;

+ /* /filename:linenr ? Save line number and ignore. */
+ if (regexec(&file_lineno, line, 2, match, 0) == 0) {
+ *line_nr = atoi(line + match[1].rm_so);
+ return 0;
+ }
+
/*
* Strip leading spaces:
*/
@@ -842,8 +854,9 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
parsed_line = tmp2 + 1;
}

- dl = disasm_line__new(offset, parsed_line, privsize);
+ dl = disasm_line__new(offset, parsed_line, privsize, *line_nr);
free(line);
+ (*line_nr)++;

if (dl == NULL)
return -1;
@@ -869,6 +882,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
return 0;
}

+static __attribute__((constructor)) void symbol__init_regexpr(void)
+{
+ regcomp(&file_lineno, "^/[^:]+:([0-9]+)$", REG_EXTENDED);
+}
+
static void delete_last_nop(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
@@ -904,6 +922,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
char symfs_filename[PATH_MAX];
struct kcore_extract kce;
bool delete_extract = false;
+ int lineno = 0;

if (filename)
symbol__join_symfs(symfs_filename, filename);
@@ -982,7 +1001,7 @@ fallback:
snprintf(command, sizeof(command),
"%s %s%s --start-address=0x%016" PRIx64
" --stop-address=0x%016" PRIx64
- " -d %s %s -C %s 2>/dev/null|grep -v %s|expand",
+ " -l -d %s %s -C %s 2>/dev/null|grep -v %s|expand",
objdump_path ? objdump_path : "objdump",
disassembler_style ? "-M " : "",
disassembler_style ? disassembler_style : "",
@@ -999,7 +1018,8 @@ fallback:
goto out_free_filename;

while (!feof(file))
- if (symbol__parse_objdump_line(sym, map, file, privsize) < 0)
+ if (symbol__parse_objdump_line(sym, map, file, privsize,
+ &lineno) < 0)
break;

/*
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 112d6e2..0784a94 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -58,6 +58,7 @@ struct disasm_line {
char *line;
char *name;
struct ins *ins;
+ int line_nr;
struct ins_operands ops;
};
--
1.9.3
Jiri Olsa
2014-10-20 18:06:05 UTC
Permalink
Post by Andi Kleen
=20
With srcline key/sort'ing it's useful to have line numbers
in the annotate window. This patch implements this.
=20
Use objdump -l to request the line numbers and
save them in the line structure. Then the browser
displays them for source lines.
=20
The line numbers are not displayed by default, but can be
toggled on with 'k'
=20
There is one unfortunate problem with this setup. For
lines not containing source and which are outside functions
objdump -l reports line numbers off by a few: it always reports
the first line number in the next function even for lines
that are outside the function.
I haven't found a nice way to detect/correct this. Probably objdump
has to be fixed.
See https://sourceware.org/bugzilla/show_bug.cgi?id=3D16433
seems like nice feature, anyway could you format the
line number into separated column?

currently without lines:


=E2=94=82 /* Receive them all */ =
=
=E2=96=92
=E2=94=82 for (i =3D 0; i < ctx->num_packets; i++) {=
=
=E2=96=92
0.02 =E2=94=82 mov (%r14),%eax =
=
=E2=96=92
0.02 =E2=94=82 xor %r13d,%r13d =
=
=E2=96=92
=E2=94=82 char data[DATASIZE]; =
=
=E2=96=92
=E2=94=82 int ret, done =3D 0; =
=
=E2=96=92
=E2=94=82 =
=
=E2=96=92
=E2=94=82 again: =
=
=E2=96=92
=E2=94=82 ret =3D read(ctx->in_fds[0], data =
+ done, DATASIZE - done); =
=E2=96=92
=E2=94=82 mov $0x64,%r12d =
=
=E2=96=92
=E2=94=82 =
=
=E2=96=92
=E2=94=82 /* Wait for start... */ =
=
=E2=96=92
=E2=94=82 ready(ctx->ready_out, ctx->wakefd); =
=
=E2=96=92


with 'k' pressed:


=E2=94=82 126 /* Receive them all */ =
=
=E2=96=92
=E2=94=82 127 for (i =3D 0; i < ctx->num_packets; i=
++) { =
=E2=96=92
0.02 =E2=94=82 mov (%r14),%eax =
=
=E2=96=92
0.02 =E2=94=82 xor %r13d,%r13d =
=
=E2=96=92
=E2=94=82 127 char data[DATASIZE]; =
=
=E2=96=92
=E2=94=82 128 int ret, done =3D 0; =
=
=E2=96=92
=E2=94=82 =
=
=E2=96=92
=E2=94=82 130 again: =
=
=E2=96=92
=E2=94=82 131 ret =3D read(ctx->in_fds[0], =
data + done, DATASIZE - done); =
=E2=96=92
=E2=94=82 mov $0x64,%r12d =
=
=E2=96=92
=E2=94=82 =
=
=E2=96=92
=E2=94=82 123 /* Wait for start... */ =
=
=E2=96=92
=E2=94=82 124 ready(ctx->ready_out, ctx->wakefd); =
=
=E2=96=92


putting line numbers close to the left line
and making the source line not to be shifted
right by line number width would be great


SNIP
Post by Andi Kleen
slsmg_write_nstring(" ", width - pcnt_width);
else if (dl->offset =3D=3D -1) {
- printed =3D scnprintf(bf, sizeof(bf), "%*s ",
+ if (dl->line_nr && annotate_browser__opts.show_linenr)
+ printed =3D scnprintf(bf, sizeof(bf), "%*s %-5d ",
+ ab->addr_width, " ", dl->line_nr);
+ else
+ printed =3D scnprintf(bf, sizeof(bf), "%*s ",
ab->addr_width, " ");
slsmg_write_nstring(bf, printed);
slsmg_write_nstring(dl->line, width - printed - pcnt_width + 1);
@@ -733,6 +738,7 @@ static int annotate_browser__run(struct annotate_=
browser *browser,
Post by Andi Kleen
"o Toggle disassembler output/simplified view\n"
"s Toggle source code view\n"
"/ Search string\n"
+ "k Toggle line numbers\n"
extra tab --------^^^^^^ making the text misaligned in the help box


thanks,
jirka
Jiri Olsa
2014-10-20 18:17:55 UTC
Permalink
On Mon, Oct 20, 2014 at 08:06:05PM +0200, Jiri Olsa wrote:

SNIP
Post by Jiri Olsa
Post by Andi Kleen
+ if (dl->line_nr && annotate_browser__opts.show_linenr)
+ printed = scnprintf(bf, sizeof(bf), "%*s %-5d ",
+ ab->addr_width, " ", dl->line_nr);
+ else
+ printed = scnprintf(bf, sizeof(bf), "%*s ",
ab->addr_width, " ");
slsmg_write_nstring(bf, printed);
slsmg_write_nstring(dl->line, width - printed - pcnt_width + 1);
@@ -733,6 +738,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
"o Toggle disassembler output/simplified view\n"
"s Toggle source code view\n"
"/ Search string\n"
+ "k Toggle line numbers\n"
extra tab --------^^^^^^ making the text misaligned in the help box
also by pressing 'k' the current line jumps to
the hottest line, which does not seem right

jirka
Andi Kleen
2014-09-26 23:37:15 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

When -v is specified always print the hex address for the srcline.

Signed-off-by: Andi Kleen <***@linux.intel.com>
---
tools/perf/util/srcline.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 36a7aff..a22be7c 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -258,6 +258,12 @@ char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
unsigned line = 0;
char *srcline;
const char *dso_name;
+ char astr[50];
+
+ if (verbose)
+ snprintf(astr, sizeof astr, " %#lx", addr);
+ else
+ astr[0] = 0;

if (!dso->has_srcline)
goto out;
@@ -276,7 +282,12 @@ char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
if (!addr2line(dso_name, addr, &file, &line, dso))
goto out;

- if (asprintf(&srcline, "%s:%u", basename(file), line) < 0) {
+ if (line == 0) {
+ free(file);
+ goto fallback;
+ }
+
+ if (asprintf(&srcline, "%s:%u%s", basename(file), line, astr) < 0) {
free(file);
goto out;
}
@@ -291,9 +302,10 @@ out:
dso->has_srcline = 0;
dso__free_a2l(dso);
}
+fallback:
if (sym) {
- if (asprintf(&srcline, "%s+%ld", show_sym ? sym->name : "",
- addr - sym->start) < 0)
+ if (asprintf(&srcline, "%s+%ld%s", show_sym ? sym->name : "",
+ addr - sym->start, astr) < 0)
return SRCLINE_UNKNOWN;
} else if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0)
return SRCLINE_UNKNOWN;
--
1.9.3
Jiri Olsa
2014-10-20 18:20:09 UTC
Permalink
Post by Andi Kleen
When -v is specified always print the hex address for the srcline.
I wasn't able to trigger this kind of output..

could you please update the changelog with the output change
and state the scenario when this is visible?
Post by Andi Kleen
---
tools/perf/util/srcline.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 36a7aff..a22be7c 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -258,6 +258,12 @@ char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
unsigned line = 0;
char *srcline;
const char *dso_name;
+ char astr[50];
+
+ if (verbose)
+ snprintf(astr, sizeof astr, " %#lx", addr);
WARNING: sizeof astr should be sizeof(astr)

thanks,
jirka

Andi Kleen
2014-09-26 23:37:11 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

For lbr-as-callgraph we need to see the line number in the history,
because many LBR entries can be in a single function, and just
showing the same function name many times is not useful.

When the history code is configured to sort by address, also try to
resolve the address to a file:srcline and display this in the browser.
If that doesn't work still display the address.

This can be also useful without LBRs for understanding which call in a large
function (or in which inlined function) called something else.

Contains fixes from Namhyung Kim

v2: Refactor code into common function
v3: Fix GTK build
v4: Rebase
Signed-off-by: Andi Kleen <***@linux.intel.com>
---
tools/perf/ui/browsers/hists.c | 17 -----------------
tools/perf/ui/gtk/hists.c | 11 +----------
tools/perf/ui/stdio/hist.c | 23 +++++++++--------------
tools/perf/util/callchain.c | 29 +++++++++++++++++++++++++++++
tools/perf/util/callchain.h | 4 ++++
tools/perf/util/machine.c | 2 +-
tools/perf/util/srcline.c | 6 ++++--
7 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index d4cef68..3144752 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -460,23 +460,6 @@ out:
return key;
}

-static char *callchain_list__sym_name(struct callchain_list *cl,
- char *bf, size_t bfsize, bool show_dso)
-{
- int printed;
-
- if (cl->ms.sym)
- printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name);
- else
- printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
-
- if (show_dso)
- scnprintf(bf + printed, bfsize - printed, " %s",
- cl->ms.map ? cl->ms.map->dso->short_name : "unknown");
-
- return bf;
-}
-
struct callchain_print_arg {
/* for hists browser */
off_t row_offset;
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index f3fa425..b19f96d 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -89,15 +89,6 @@ void perf_gtk__init_hpp(void)
perf_gtk__hpp_color_overhead_acc;
}

-static void callchain_list__sym_name(struct callchain_list *cl,
- char *bf, size_t bfsize)
-{
- if (cl->ms.sym)
- scnprintf(bf, bfsize, "%s", cl->ms.sym->name);
- else
- scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
-}
-
static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
GtkTreeIter *parent, int col, u64 total)
{
@@ -128,7 +119,7 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
scnprintf(buf, sizeof(buf), "%5.2f%%", percent);
gtk_tree_store_set(store, &iter, 0, buf, -1);

- callchain_list__sym_name(chain, buf, sizeof(buf));
+ callchain_list__sym_name(chain, buf, sizeof(buf), false);
gtk_tree_store_set(store, &iter, col, buf, -1);

if (need_new_parent) {
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 15b451a..dfcbc90 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -41,6 +41,7 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
{
int i;
size_t ret = 0;
+ char bf[1024];

ret += callchain__fprintf_left_margin(fp, left_margin);
for (i = 0; i < depth; i++) {
@@ -56,11 +57,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
} else
ret += fprintf(fp, "%s", " ");
}
- if (chain->ms.sym)
- ret += fprintf(fp, "%s\n", chain->ms.sym->name);
- else
- ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip);
-
+ fputs(callchain_list__sym_name(chain, bf, sizeof(bf), false), fp);
+ fputc('\n', fp);
return ret;
}

@@ -168,6 +166,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
struct rb_node *node;
int i = 0;
int ret = 0;
+ char bf[1024];

/*
* If have one single callchain root, don't bother printing
@@ -196,10 +195,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
} else
ret += callchain__fprintf_left_margin(fp, left_margin);

- if (chain->ms.sym)
- ret += fprintf(fp, " %s\n", chain->ms.sym->name);
- else
- ret += fprintf(fp, " %p\n", (void *)(long)chain->ip);
+ ret += fprintf(fp, "%s\n", callchain_list__sym_name(chain, bf, sizeof(bf),
+ false));

if (++entries_printed == callchain_param.print_limit)
break;
@@ -219,6 +216,7 @@ static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node,
{
struct callchain_list *chain;
size_t ret = 0;
+ char bf[1024];

if (!node)
return 0;
@@ -229,11 +227,8 @@ static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node,
list_for_each_entry(chain, &node->val, list) {
if (chain->ip >= PERF_CONTEXT_MAX)
continue;
- if (chain->ms.sym)
- ret += fprintf(fp, " %s\n", chain->ms.sym->name);
- else
- ret += fprintf(fp, " %p\n",
- (void *)(long)chain->ip);
+ ret += fprintf(fp, " %s\n", callchain_list__sym_name(chain,
+ bf, sizeof(bf), false));
}

return ret;
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 265457c..b7ee210 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -655,3 +655,32 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
out:
return 1;
}
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+ char *bf, size_t bfsize, bool show_dso)
+{
+ int printed;
+
+ if (cl->ms.sym) {
+ if (callchain_param.key == CCKEY_ADDRESS &&
+ cl->ms.map && !cl->srcline)
+ cl->srcline = get_srcline(cl->ms.map->dso,
+ map__rip_2objdump(cl->ms.map,
+ cl->ip));
+ if (cl->srcline)
+ printed = scnprintf(bf, bfsize, "%s %s",
+ cl->ms.sym->name, cl->srcline);
+ else
+ printed = scnprintf(bf, bfsize, "%s",
+ cl->ms.sym->name);
+ } else
+ printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
+
+ if (show_dso)
+ scnprintf(bf + printed, bfsize - printed, " %s",
+ cl->ms.map ?
+ cl->ms.map->dso->short_name :
+ "unknown");
+
+ return bf;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 7a1788a..7e27bf1 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -66,6 +66,7 @@ struct callchain_param {
struct callchain_list {
u64 ip;
struct map_symbol ms;
+ char *srcline;
struct list_head list;
};

@@ -190,4 +191,7 @@ static inline int arch_skip_callchain_idx(struct machine *machine __maybe_unused
}
#endif

+char *callchain_list__sym_name(struct callchain_list *cl,
+ char *bf, size_t bfsize, bool show_dso);
+
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8ba32ce..04bd3f4 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1396,7 +1396,7 @@ static int add_callchain_ip(struct machine *machine,
return -EINVAL;
}

- return callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym);
+ return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym);
}

#define CHASHSZ 127
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index f3e4bc5..c6a7cdc 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -258,7 +258,7 @@ char *get_srcline(struct dso *dso, unsigned long addr)
const char *dso_name;

if (!dso->has_srcline)
- return SRCLINE_UNKNOWN;
+ goto out;

if (dso->symsrc_filename)
dso_name = dso->symsrc_filename;
@@ -289,7 +289,9 @@ out:
dso->has_srcline = 0;
dso__free_a2l(dso);
}
- return SRCLINE_UNKNOWN;
+ if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0)
+ return SRCLINE_UNKNOWN;
+ return srcline;
}

void free_srcline(char *srcline)
--
1.9.3
Jiri Olsa
2014-10-20 17:57:15 UTC
Permalink
SNIP
Post by Andi Kleen
- if (chain->ms.sym)
- ret += fprintf(fp, " %s\n", chain->ms.sym->name);
- else
- ret += fprintf(fp, " %p\n",
- (void *)(long)chain->ip);
+ ret += fprintf(fp, " %s\n", callchain_list__sym_name(chain,
+ bf, sizeof(bf), false));
}
return ret;
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 265457c..b7ee210 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -655,3 +655,32 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
return 1;
}
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+ char *bf, size_t bfsize, bool show_dso)
+{
+ int printed;
+
+ if (cl->ms.sym) {
+ if (callchain_param.key == CCKEY_ADDRESS &&
+ cl->ms.map && !cl->srcline)
+ cl->srcline = get_srcline(cl->ms.map->dso,
+ map__rip_2objdump(cl->ms.map,
+ cl->ip));
+ if (cl->srcline)
+ printed = scnprintf(bf, bfsize, "%s %s",
+ cl->ms.sym->name, cl->srcline);
+ else
+ printed = scnprintf(bf, bfsize, "%s",
+ cl->ms.sym->name);
+ } else
+ printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
+
+ if (show_dso)
+ scnprintf(bf + printed, bfsize - printed, " %s",
+ cl->ms.map ?
+ "unknown");
+
+ return bf;
could you please get the callchain_list__sym_name factoring code
separated of the new changes you introduce?

thanks,
jirka
Andi Kleen
2014-09-26 23:37:16 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

asprintf corrupts memory on some older glibc versions.
Provide a replacement. This fixes various segfaults
with --branch-history on older Fedoras.

Signed-off-by: Andi Kleen <***@linux.intel.com>
---
tools/perf/Makefile.perf | 1 +
tools/perf/builtin-report.c | 3 ++-
tools/perf/util/asprintf.c | 28 ++++++++++++++++++++++++++++
3 files changed, 31 insertions(+), 1 deletion(-)
create mode 100644 tools/perf/util/asprintf.c

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 171f4e6..5bdb960 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -377,6 +377,7 @@ LIB_OBJS += $(OUTPUT)util/vdso.o
LIB_OBJS += $(OUTPUT)util/stat.o
LIB_OBJS += $(OUTPUT)util/record.o
LIB_OBJS += $(OUTPUT)util/srcline.o
+LIB_OBJS += $(OUTPUT)util/asprintf.o
LIB_OBJS += $(OUTPUT)util/data.o
LIB_OBJS += $(OUTPUT)util/tsc.o
LIB_OBJS += $(OUTPUT)util/cloexec.o
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index cd87a40..96bc166 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -756,7 +756,8 @@ repeat:
callchain_param.branch_callstack = 1;
callchain_param.key = CCKEY_ADDRESS;
symbol_conf.use_callchain = true;
- callchain_register_param(&callchain_param);
+ if (callchain_register_param(&callchain_param) < 0)
+ pr_err("Cannot register callchain parameters");
if (sort_order == default_sort_order)
sort_order = "srcline,symbol,dso";
branch_mode = 0;
diff --git a/tools/perf/util/asprintf.c b/tools/perf/util/asprintf.c
new file mode 100644
index 0000000..9aafaca
--- /dev/null
+++ b/tools/perf/util/asprintf.c
@@ -0,0 +1,28 @@
+/* Replacement for asprintf as it's buggy in older glibc versions */
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+int vasprintf(char **str, const char *fmt, va_list ap)
+{
+ char buf[1024];
+ int len = vsnprintf(buf, sizeof buf, fmt, ap);
+
+ *str = malloc(len + 1);
+ if (!*str)
+ return -1;
+ strcpy(*str, buf);
+ return len;
+}
+
+int asprintf(char **str, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = vasprintf(str, fmt, ap);
+ va_end(ap);
+ return ret;
+}
--
1.9.3
Andi Kleen
2014-09-26 23:37:10 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

Add a --branch-history option to perf report that changes all
the settings necessary for using the branches in callstacks.

This is just a short cut to make this nicer to use, it does
not enable any functionality by itself.

v2: Change sort order. Rename option to --branch-history to
be less confusing.
v3: Updates
v4: Fix conflict with newer perf base
Signed-off-by: Andi Kleen <***@linux.intel.com>
---
tools/perf/Documentation/perf-report.txt | 5 +++++
tools/perf/builtin-report.c | 35 +++++++++++++++++++++++++++-----
2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 22706be..dd7cccd 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -271,6 +271,11 @@ OPTIONS
branch stacks and it will automatically switch to the branch view mode,
unless --no-branch-stack is used.

+--branch-history::
+ Add the addresses of sampled taken branches to the callstack.
+ This allows to examine the path the program took to each sample.
+ The data collection must have used -b (or -j) and -g.
+
--objdump=<path>::
Path to objdump binary.

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 6b53d02..cd87a40 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -226,8 +226,9 @@ static int report__setup_sample_type(struct report *rep)
return -EINVAL;
}
if (symbol_conf.use_callchain) {
- ui__error("Selected -g but no callchain data. Did "
- "you call 'perf record' without -g?\n");
+ ui__error("Selected -g or --branch-history but no "
+ "callchain data. Did\n"
+ "you call 'perf record' without -g?\n");
return -1;
}
} else if (!rep->dont_use_callchains &&
@@ -550,6 +551,16 @@ parse_branch_mode(const struct option *opt __maybe_unused,
}

static int
+parse_branch_call_mode(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused, int unset)
+{
+ int *branch_mode = opt->value;
+
+ *branch_mode = !unset;
+ return 0;
+}
+
+static int
parse_percent_limit(const struct option *opt, const char *str,
int unset __maybe_unused)
{
@@ -564,7 +575,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
struct perf_session *session;
struct stat st;
bool has_br_stack = false;
- int branch_mode = -1;
+ int branch_mode = -1, branch_call_mode = -1;
int ret = -1;
char callchain_default_opt[] = "fractal,0.5,callee";
const char * const report_usage[] = {
@@ -675,7 +686,11 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
"Show event group information together"),
OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
- "use branch records for histogram filling", parse_branch_mode),
+ "use branch records for per branch histogram filling",
+ parse_branch_mode),
+ OPT_CALLBACK_NOOPT(0, "branch-history", &branch_call_mode, "",
+ "add last branch records to call history",
+ parse_branch_call_mode),
OPT_STRING(0, "objdump", &objdump_path, "path",
"objdump binary to use for disassembly and annotations"),
OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
@@ -732,10 +747,20 @@ repeat:
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);

- if ((branch_mode == -1 && has_br_stack) || branch_mode == 1) {
+ if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) &&
+ branch_call_mode == -1) {
sort__mode = SORT_MODE__BRANCH;
symbol_conf.cumulate_callchain = false;
}
+ if (branch_call_mode != -1) {
+ callchain_param.branch_callstack = 1;
+ callchain_param.key = CCKEY_ADDRESS;
+ symbol_conf.use_callchain = true;
+ callchain_register_param(&callchain_param);
+ if (sort_order == default_sort_order)
+ sort_order = "srcline,symbol,dso";
+ branch_mode = 0;
+ }

if (report.mem_mode) {
if (sort__mode == SORT_MODE__BRANCH) {
--
1.9.3
Jiri Olsa
2014-10-20 07:54:54 UTC
Permalink
On Fri, Sep 26, 2014 at 04:37:10PM -0700, Andi Kleen wrote:

SNIP
Post by Andi Kleen
+ const char *str __maybe_unused, int unset)
+{
+ int *branch_mode = opt->value;
+
+ *branch_mode = !unset;
+ return 0;
+}
+
+static int
parse_percent_limit(const struct option *opt, const char *str,
int unset __maybe_unused)
{
@@ -564,7 +575,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
struct perf_session *session;
struct stat st;
bool has_br_stack = false;
- int branch_mode = -1;
+ int branch_mode = -1, branch_call_mode = -1;
this one no longer applies as well

jirka
Jiri Olsa
2014-10-20 17:51:46 UTC
Permalink
On Fri, Sep 26, 2014 at 04:37:10PM -0700, Andi Kleen wrote:

SNIP
Post by Andi Kleen
@@ -564,7 +575,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
struct perf_session *session;
struct stat st;
bool has_br_stack = false;
- int branch_mode = -1;
+ int branch_mode = -1, branch_call_mode = -1;
int ret = -1;
char callchain_default_opt[] = "fractal,0.5,callee";
const char * const report_usage[] = {
@@ -675,7 +686,11 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
"Show event group information together"),
OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
- "use branch records for histogram filling", parse_branch_mode),
+ "use branch records for per branch histogram filling",
+ parse_branch_mode),
+ OPT_CALLBACK_NOOPT(0, "branch-history", &branch_call_mode, "",
+ "add last branch records to call history",
+ parse_branch_call_mode),
I think we've been through this before, but why's not
branch_mode and branch_call_mode bools? sry if we already
sorted this out, but I couldnt find it
Post by Andi Kleen
OPT_STRING(0, "objdump", &objdump_path, "path",
"objdump binary to use for disassembly and annotations"),
OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
- if ((branch_mode == -1 && has_br_stack) || branch_mode == 1) {
+ if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) &&
+ branch_call_mode == -1) {
sort__mode = SORT_MODE__BRANCH;
symbol_conf.cumulate_callchain = false;
}
+ if (branch_call_mode != -1) {
+ callchain_param.branch_callstack = 1;
+ callchain_param.key = CCKEY_ADDRESS;
+ symbol_conf.use_callchain = true;
+ callchain_register_param(&callchain_param);
+ if (sort_order == default_sort_order)
+ sort_order = "srcline,symbol,dso";
+ branch_mode = 0;
+ }
also why set branch_mode if there's no further usage of it?

thanks,
jirka
Andi Kleen
2014-09-26 23:37:09 UTC
Permalink
From: Andi Kleen <***@linux.intel.com>

Currently branch stacks can be only shown as edge histograms for
individual branches. I never found this display particularly useful.

This implements an alternative mode that creates histograms over complete
branch traces, instead of individual branches, similar to how normal
callgraphs are handled. This is done by putting it in
front of the normal callgraph and then using the normal callgraph
histogram infrastructure to unify them.

This way in complex functions we can understand the control flow
that lead to a particular sample, and may even see some control
flow in the caller for short functions.

Example (simplified, of course for such simple code this
is usually not needed):

tcall.c:

volatile a = 10000, b = 100000, c;

__attribute__((noinline)) f2()
{
c = a / b;
}

__attribute__((noinline)) f1()
{
f2();
f2();
}
main()
{
int i;
for (i = 0; i < 1000000; i++)
f1();
}

% perf record -b -g ./tsrc/tcall
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.044 MB perf.data (~1923 samples) ]
% perf report --branch-history
...
54.91% tcall.c:6 [.] f2 tcall
|
|--65.53%-- f2 tcall.c:5
| |
| |--70.83%-- f1 tcall.c:11
| | f1 tcall.c:10
| | main tcall.c:18
| | main tcall.c:18
| | main tcall.c:17
| | main tcall.c:17
| | f1 tcall.c:13
| | f1 tcall.c:13
| | f2 tcall.c:7
| | f2 tcall.c:5
| | f1 tcall.c:12
| | f1 tcall.c:12
| | f2 tcall.c:7
| | f2 tcall.c:5
| | f1 tcall.c:11
| |
| --29.17%-- f1 tcall.c:12
| f1 tcall.c:12
| f2 tcall.c:7
| f2 tcall.c:5
| f1 tcall.c:11
| f1 tcall.c:10
| main tcall.c:18
| main tcall.c:18
| main tcall.c:17
| main tcall.c:17
| f1 tcall.c:13
| f1 tcall.c:13
| f2 tcall.c:7
| f2 tcall.c:5
| f1 tcall.c:12

The default output is unchanged.

This is only implemented in perf report, no change to record
or anywhere else.

This adds the basic code to report:
- add a new "branch" option to the -g option parser to enable this mode
- when the flag is set include the LBR into the callstack in machine.c.
The rest of the history code is unchanged and doesn't know the difference
between LBR entry and normal call entry.
- detect overlaps with the callchain
- remove small loop duplicates in the LBR

Current limitations:
- The LBR flags (mispredict etc.) are not shown in the history
and LBR entries have no special marker.
- It would be nice if annotate marked the LBR entries somehow
(e.g. with arrows)

v2: Various fixes.
v3: Merge further patches into this one. Fix white space.
v4: Improve manpage. Address review feedback.
v5: Rename functions. Better error message without -g. Fix crash without
-b.
v6: Rebase
v7: Rebase. Use NO_ENTRY in memset.
Signed-off-by: Andi Kleen <***@linux.intel.com>
---
tools/perf/Documentation/perf-report.txt | 7 +-
tools/perf/builtin-report.c | 4 +-
tools/perf/util/callchain.c | 2 +
tools/perf/util/callchain.h | 1 +
tools/perf/util/machine.c | 162 +++++++++++++++++++++++++++----
tools/perf/util/symbol.h | 3 +-
6 files changed, 154 insertions(+), 25 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 0927bf4..22706be 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -159,7 +159,7 @@ OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.

--g [type,min[,limit],order[,key]]::
+-g [type,min[,limit],order[,key][,branch]]::
--call-graph::
Display call chains using type, min percent threshold, optional print
limit and order.
@@ -177,6 +177,11 @@ OPTIONS
- function: compare on functions
- address: compare on individual code addresses

+ branch can be:
+ - branch: include last branch information in callgraph
+ when available. Usually more convenient to use --branch-history
+ for this.
+
Default: fractal,0.5,callee,function.

--children::
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8c0b3f2..6b53d02 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -628,8 +628,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"regex filter to identify parent, see: '--sort parent'"),
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
- OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
- "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]",
+ "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. "
"Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 08f0fbf..265457c 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -61,6 +61,8 @@ parse_callchain_report_opt(const char *arg)
callchain_param.key = CCKEY_FUNCTION;
else if (!strncmp(tok, "address", strlen(tok)))
callchain_param.key = CCKEY_ADDRESS;
+ else if (!strncmp(tok, "branch", strlen(tok)))
+ callchain_param.branch_callstack = 1;
/* try to get the min percent */
else if (!minpcnt_set) {
callchain_param.min_percent = strtod(tok, &endptr);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index da43619..7a1788a 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -60,6 +60,7 @@ struct callchain_param {
sort_chain_func_t sort;
enum chain_order order;
enum chain_key key;
+ bool branch_callstack;
};

struct callchain_list {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b2ec38b..8ba32ce 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -12,6 +12,7 @@
#include <stdbool.h>
#include <symbol/kallsyms.h>
#include "unwind.h"
+#include "linux/hash.h"

int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
@@ -1364,9 +1365,84 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
return bi;
}

+static int add_callchain_ip(struct machine *machine,
+ struct thread *thread,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int cpumode,
+ u64 ip)
+{
+ struct addr_location al;
+
+ al.filtered = 0;
+ al.sym = NULL;
+ if (cpumode == -1)
+ thread__find_cpumode_addr_location(thread, machine, MAP__FUNCTION, ip, &al);
+ else
+ thread__find_addr_location(thread, machine, cpumode, MAP__FUNCTION,
+ ip, &al);
+ if (al.sym != NULL) {
+ if (sort__has_parent && !*parent &&
+ symbol__match_regex(al.sym, &parent_regex))
+ *parent = al.sym;
+ else if (have_ignore_callees && root_al &&
+ symbol__match_regex(al.sym, &ignore_callees_regex)) {
+ /* Treat this symbol as the root,
+ forgetting its callees. */
+ *root_al = al;
+ callchain_cursor_reset(&callchain_cursor);
+ }
+ if (!symbol_conf.use_callchain)
+ return -EINVAL;
+ }
+
+ return callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym);
+}
+
+#define CHASHSZ 127
+#define CHASHBITS 7
+#define NO_ENTRY 0xff
+
+#define PERF_MAX_BRANCH_DEPTH 127
+
+/* Remove loops. */
+static int remove_loops(struct branch_entry *l, int nr)
+{
+ int i, j, off;
+ unsigned char chash[CHASHSZ];
+ memset(chash, NO_ENTRY, sizeof(chash));
+
+ BUG_ON(nr >= 256);
+ for (i = 0; i < nr; i++) {
+ int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
+
+ /* no collision handling for now */
+ if (chash[h] == NO_ENTRY) {
+ chash[h] = i;
+ } else if (l[chash[h]].from == l[i].from) {
+ bool is_loop = true;
+ /* check if it is a real loop */
+ off = 0;
+ for (j = chash[h]; j < i && i + off < nr; j++, off++)
+ if (l[j].from != l[i + off].from) {
+ is_loop = false;
+ break;
+ }
+ if (is_loop) {
+ memmove(l + i, l + i + off,
+ (nr - (i + off))
+ * sizeof(struct branch_entry));
+ nr -= off;
+ }
+ }
+ }
+ return nr;
+}
+
static int machine__resolve_callchain_sample(struct machine *machine,
struct thread *thread,
struct ip_callchain *chain,
+ struct branch_stack *branch,
struct symbol **parent,
struct addr_location *root_al,
int max_stack)
@@ -1377,9 +1453,66 @@ static int machine__resolve_callchain_sample(struct machine *machine,
int j;
int err;
int skip_idx __maybe_unused;
+ int first_call = 0;

callchain_cursor_reset(&callchain_cursor);

+ /*
+ * Add branches to call stack for easier browsing. This gives
+ * more context for a sample than just the callers.
+ *
+ * This uses individual histograms of paths compared to the
+ * aggregated histograms the normal LBR mode uses.
+ *
+ * Limitations for now:
+ * - No extra filters
+ * - No annotations (should annotate somehow)
+ */
+
+ if (branch && callchain_param.branch_callstack) {
+ int nr = min(max_stack, (int)branch->nr);
+ struct branch_entry be[nr];
+
+ if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
+ pr_warning("corrupted branch chain. skipping...\n");
+ return 0;
+ }
+
+ for (i = 0; i < nr; i++) {
+ if (callchain_param.order == ORDER_CALLEE) {
+ be[i] = branch->entries[i];
+ /*
+ * Check for overlap into the callchain.
+ * The return address is one off compared to
+ * the branch entry. To adjust for this
+ * assume the calling instruction is not longer
+ * than 8 bytes.
+ */
+ if (be[i].from < chain->ips[first_call] &&
+ be[i].from >= chain->ips[first_call] - 8)
+ first_call++;
+ } else
+ be[i] = branch->entries[branch->nr - i - 1];
+ }
+
+ nr = remove_loops(be, nr);
+
+ for (i = 0; i < nr; i++) {
+ err = add_callchain_ip(machine, thread, parent,
+ root_al,
+ -1, be[i].to);
+ if (!err)
+ err = add_callchain_ip(machine, thread,
+ parent, root_al,
+ -1, be[i].from);
+ if (err == -EINVAL)
+ break;
+ if (err)
+ return err;
+ }
+ chain_nr -= nr;
+ }
+
if (chain->nr > PERF_MAX_STACK_DEPTH) {
pr_warning("corrupted callchain. skipping...\n");
return 0;
@@ -1391,9 +1524,8 @@ static int machine__resolve_callchain_sample(struct machine *machine,
*/
skip_idx = arch_skip_callchain_idx(machine, thread, chain);

- for (i = 0; i < chain_nr; i++) {
+ for (i = first_call; i < chain_nr; i++) {
u64 ip;
- struct addr_location al;

if (callchain_param.order == ORDER_CALLEE)
j = i;
@@ -1430,24 +1562,10 @@ static int machine__resolve_callchain_sample(struct machine *machine,
continue;
}

- al.filtered = 0;
- thread__find_addr_location(thread, machine, cpumode,
- MAP__FUNCTION, ip, &al);
- if (al.sym != NULL) {
- if (sort__has_parent && !*parent &&
- symbol__match_regex(al.sym, &parent_regex))
- *parent = al.sym;
- else if (have_ignore_callees && root_al &&
- symbol__match_regex(al.sym, &ignore_callees_regex)) {
- /* Treat this symbol as the root,
- forgetting its callees. */
- *root_al = al;
- callchain_cursor_reset(&callchain_cursor);
- }
- }
-
- err = callchain_cursor_append(&callchain_cursor,
- ip, al.map, al.sym);
+ err = add_callchain_ip(machine, thread, parent, root_al,
+ cpumode, ip);
+ if (err == -EINVAL)
+ break;
if (err)
return err;
}
@@ -1473,7 +1591,9 @@ int machine__resolve_callchain(struct machine *machine,
int ret;

ret = machine__resolve_callchain_sample(machine, thread,
- sample->callchain, parent,
+ sample->callchain,
+ sample->branch_stack,
+ parent,
root_al, max_stack);
if (ret)
return ret;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index bec4b7b..bae04e2 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -122,7 +122,8 @@ struct symbol_conf {
demangle,
demangle_kernel,
filter_relative,
- show_hist_headers;
+ show_hist_headers,
+ branch_callstack;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
--
1.9.3
Jiri Olsa
2014-10-20 07:54:45 UTC
Permalink
On Fri, Sep 26, 2014 at 04:37:09PM -0700, Andi Kleen wrote:

SNIP
Post by Andi Kleen
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
- OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
- "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]",
+ "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. "
"Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 08f0fbf..265457c 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -61,6 +61,8 @@ parse_callchain_report_opt(const char *arg)
callchain_param.key = CCKEY_FUNCTION;
else if (!strncmp(tok, "address", strlen(tok)))
callchain_param.key = CCKEY_ADDRESS;
+ else if (!strncmp(tok, "branch", strlen(tok)))
+ callchain_param.branch_callstack = 1;
this needs to be rebased to latest Namhyung's changes
which got in..

could you please rebase on Arnaldo's perf/core?

jirka
Jiri Olsa
2014-10-20 10:10:45 UTC
Permalink
SNIP
Post by Andi Kleen
struct callchain_list {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b2ec38b..8ba32ce 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -12,6 +12,7 @@
#include <stdbool.h>
#include <symbol/kallsyms.h>
#include "unwind.h"
+#include "linux/hash.h"
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
@@ -1364,9 +1365,84 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
return bi;
}
+static int add_callchain_ip(struct machine *machine,
+ struct thread *thread,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int cpumode,
+ u64 ip)
+{
+ struct addr_location al;
+
+ al.filtered = 0;
+ al.sym = NULL;
+ if (cpumode == -1)
+ thread__find_cpumode_addr_location(thread, machine, MAP__FUNCTION, ip, &al);
+ else
+ thread__find_addr_location(thread, machine, cpumode, MAP__FUNCTION,
+ ip, &al);
this cpumode condition is new (wrt below comment)
Post by Andi Kleen
+ if (al.sym != NULL) {
+ if (sort__has_parent && !*parent &&
+ symbol__match_regex(al.sym, &parent_regex))
+ *parent = al.sym;
+ else if (have_ignore_callees && root_al &&
+ symbol__match_regex(al.sym, &ignore_callees_regex)) {
+ /* Treat this symbol as the root,
+ forgetting its callees. */
+ *root_al = al;
+ callchain_cursor_reset(&callchain_cursor);
+ }
+ if (!symbol_conf.use_callchain)
+ return -EINVAL;
why is this condition here?

could you please split this change into
- adding add_callchain_ip function
- adding more functionality to add_callchain_ip function?

IMO it'd make it cleaner and easier to understand

thanks,
jirka
Loading...