From 97db62062ac76e314c8bda4dc5b63f0ea906d15f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 31 Aug 2015 21:39:44 +0300 Subject: [PATCH 1/9] perf tools: Fix build on powerpc broken by pt/bts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is theoretically possible to process perf.data files created on x86 and that contain Intel PT or Intel BTS data, on any other architecture, which is why it is possible for there to be build errors on powerpc caused by pt/bts. The errors were: util/intel-pt-decoder/intel-pt-insn-decoder.c: In function ‘intel_pt_insn_decoder’: util/intel-pt-decoder/intel-pt-insn-decoder.c:138:3: error: switch missing default case [-Werror=switch-default] switch (insn->immediate.nbytes) { ^ cc1: all warnings being treated as errors linux-acme.git/tools/perf/perf-obj/libperf.a(libperf-in.o): In function `intel_pt_synth_branch_sample': sources/linux-acme.git/tools/perf/util/intel-pt.c:871: undefined reference to `tsc_to_perf_time' linux-acme.git/tools/perf/perf-obj/libperf.a(libperf-in.o): In function `intel_pt_sample': sources/linux-acme.git/tools/perf/util/intel-pt.c:915: undefined reference to `tsc_to_perf_time' sources/linux-acme.git/tools/perf/util/intel-pt.c:962: undefined reference to `tsc_to_perf_time' linux-acme.git/tools/perf/perf-obj/libperf.a(libperf-in.o): In function `intel_pt_process_event': sources/linux-acme.git/tools/perf/util/intel-pt.c:1454: undefined reference to `perf_time_to_tsc' Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Sukadev Bhattiprolu Cc: Wang Nan Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1441046384-28663-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e912856cc4e5..e79e4522368a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -75,6 +75,7 @@ libperf-y += record.o libperf-y += srcline.o libperf-y += data.o libperf-$(CONFIG_X86) += tsc.o +libperf-$(CONFIG_AUXTRACE) += tsc.o libperf-y += cloexec.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 9e4eb8fcd559..d23138c06665 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -146,6 +146,9 @@ static void intel_pt_insn_decoder(struct insn *insn, case 4: intel_pt_insn->rel = bswap_32(insn->immediate.value); break; + default: + intel_pt_insn->rel = 0; + break; } #else intel_pt_insn->rel = insn->immediate.value; -- 2.4.3 From acf860ae7c53cc8b0c5d372c218332aac3eeba4f Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Thu, 27 Aug 2015 02:30:55 +0000 Subject: [PATCH 2/9] bpf tools: New API to get name from a BPF object Before this patch there's no way to connect a loaded bpf object to its source file. However, during applying perf's '--filter' to BPF object, without this connection makes things harder, because perf loads all programs together, but '--filter' setting is for each object. The API of bpf_object__open_buffer() is changed to allow passing a name. Fortunately, at this time there's only one user of it (perf test LLVM), so we change it together. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: David Ahern Cc: He Kuang Cc: Jiri Olsa Cc: Kaixu Xia Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1440742821-44548-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/libbpf.c | 25 ++++++++++++++++++++++--- tools/lib/bpf/libbpf.h | 4 +++- tools/perf/tests/llvm.c | 2 +- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4fa4bc4505f5..4252fc22f78f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -880,15 +880,26 @@ struct bpf_object *bpf_object__open(const char *path) } struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz) + size_t obj_buf_sz, + const char *name) { + char tmp_name[64]; + /* param validation */ if (!obj_buf || obj_buf_sz <= 0) return NULL; - pr_debug("loading object from buffer\n"); + if (!name) { + snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", + (unsigned long)obj_buf, + (unsigned long)obj_buf_sz); + tmp_name[sizeof(tmp_name) - 1] = '\0'; + name = tmp_name; + } + pr_debug("loading object '%s' from buffer\n", + name); - return __bpf_object__open("[buffer]", obj_buf, obj_buf_sz); + return __bpf_object__open(name, obj_buf, obj_buf_sz); } int bpf_object__unload(struct bpf_object *obj) @@ -975,6 +986,14 @@ bpf_object__next(struct bpf_object *prev) return next; } +const char * +bpf_object__get_name(struct bpf_object *obj) +{ + if (!obj) + return NULL; + return obj->path; +} + struct bpf_program * bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index ea8adc206b62..f16170c95ffd 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -28,12 +28,14 @@ struct bpf_object; struct bpf_object *bpf_object__open(const char *path); struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz); + size_t obj_buf_sz, + const char *name); void bpf_object__close(struct bpf_object *object); /* Load/unload object into/from kernel */ int bpf_object__load(struct bpf_object *obj); int bpf_object__unload(struct bpf_object *obj); +const char *bpf_object__get_name(struct bpf_object *obj); struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index a337356fd979..52d55971f66f 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -26,7 +26,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { struct bpf_object *obj; - obj = bpf_object__open_buffer(obj_buf, obj_buf_sz); + obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL); if (!obj) return -1; bpf_object__close(obj); -- 2.4.3 From d988d5ee647861706bc7a391ddbc29429b50f00e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 21 Aug 2015 02:23:14 -0400 Subject: [PATCH 3/9] perf evlist: Open event on evsel cpus and threads An evsel may have different cpus and threads than the evlist it is in. Use it's own cpus and threads, when opening the evsel in 'perf record'. Signed-off-by: Kan Liang Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1440138194-17001-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/util/evlist.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a660022f2c92..1d14f382f614 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -279,7 +279,7 @@ static int record__open(struct record *rec) evlist__for_each(evlist, pos) { try_again: - if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { + if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { if (verbose) ui__warning("%s\n", msg); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8d00039d6a20..d51a5200c8af 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1181,6 +1181,10 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e if (evsel->filter == NULL) continue; + /* + * filters only work for tracepoint event, which doesn't have cpu limit. + * So evlist and evsel should always be same. + */ err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); if (err) { *err_evsel = evsel; -- 2.4.3 From fc36f9485aee3a62b22be1f561543a31bce6d48e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 31 Aug 2015 18:41:10 +0200 Subject: [PATCH 4/9] perf script: Enable printing of interrupted machine state This patch adds the output of the interrupted machine state (iregs) to perf script. It presents them as NAME:VALUE so this is easy to parse during post processing. To capture the interrupted machine state: $ perf record -I .... to display iregs, use the -F option: $ perf script -F ip,iregs 40afc2 AX:0x6c5770 BX:0x1e CX:0x5f4d80a DX:0x101010101010101 SI:0x1 Signed-off-by: Stephane Eranian Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441039273-16260-2-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 2 +- tools/perf/builtin-script.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 614b2c7b0293..dc3ec783b7bd 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,7 +116,7 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, flags. + srcline, period, iregs, flags. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 4430340292c0..eb51325e8ad9 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -6,6 +6,7 @@ #include "util/exec_cmd.h" #include "util/header.h" #include "util/parse-options.h" +#include "util/perf_regs.h" #include "util/session.h" #include "util/tool.h" #include "util/symbol.h" @@ -46,6 +47,7 @@ enum perf_output_field { PERF_OUTPUT_SYMOFFSET = 1U << 11, PERF_OUTPUT_SRCLINE = 1U << 12, PERF_OUTPUT_PERIOD = 1U << 13, + PERF_OUTPUT_IREGS = 1U << 14, }; struct output_option { @@ -66,6 +68,7 @@ struct output_option { {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, {.str = "period", .field = PERF_OUTPUT_PERIOD}, + {.str = "iregs", .field = PERF_OUTPUT_IREGS}, }; /* default set to maintain compatibility with current format */ @@ -255,6 +258,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_PERIOD)) return -EINVAL; + if (PRINT_FIELD(IREGS) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", + PERF_OUTPUT_IREGS)) + return -EINVAL; + return 0; } @@ -352,6 +360,24 @@ out: return 0; } +static void print_sample_iregs(union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct thread *thread __maybe_unused, + struct perf_event_attr *attr) +{ + struct regs_dump *regs = &sample->intr_regs; + uint64_t mask = attr->sample_regs_intr; + unsigned i = 0, r; + + if (!regs) + return; + + for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs->regs[i++]; + printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); + } +} + static void print_sample_start(struct perf_sample *sample, struct thread *thread, struct perf_evsel *evsel) @@ -525,6 +551,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample, PERF_MAX_STACK_DEPTH); } + if (PRINT_FIELD(IREGS)) + print_sample_iregs(event, sample, thread, attr); + printf("\n"); } @@ -1643,7 +1672,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,flags", parse_output_fields), + "addr,symoff,period,iregs,flags", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", -- 2.4.3 From c5e991ee9dff0f8136168ed2d0d1a8cc3620dac4 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 31 Aug 2015 18:41:11 +0200 Subject: [PATCH 5/9] perf/x86: Add list of register names This patch adds a way to locate a register identifier (PERF_X86_REG_*) based on its name, e.g., AX. This will be used by a subsequent patch to improved flexibility of perf record. Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441039273-16260-3-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/Build | 1 + tools/perf/arch/x86/util/perf_regs.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/perf_regs.h | 7 +++++++ 3 files changed, 38 insertions(+) create mode 100644 tools/perf/arch/x86/util/perf_regs.c diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 2c55e1b336c5..ff63649fa9ac 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -2,6 +2,7 @@ libperf-y += header.o libperf-y += tsc.o libperf-y += pmu.o libperf-y += kvm-stat.o +libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c new file mode 100644 index 000000000000..087c84ef5234 --- /dev/null +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -0,0 +1,30 @@ +#include "../../perf.h" +#include "../../util/perf_regs.h" + +#define REG(n, b) { .name = #n, .mask = 1ULL << (b) } +#define REG_END { .name = NULL } +const struct sample_reg sample_reg_masks[] = { + REG(AX, PERF_REG_X86_AX), + REG(BX, PERF_REG_X86_BX), + REG(CX, PERF_REG_X86_CX), + REG(DX, PERF_REG_X86_DX), + REG(SI, PERF_REG_X86_SI), + REG(DI, PERF_REG_X86_DI), + REG(BP, PERF_REG_X86_BP), + REG(SP, PERF_REG_X86_SP), + REG(IP, PERF_REG_X86_IP), + REG(FLAGS, PERF_REG_X86_FLAGS), + REG(CS, PERF_REG_X86_CS), + REG(SS, PERF_REG_X86_SS), +#ifdef HAVE_ARCH_X86_64_SUPPORT + REG(R8, PERF_REG_X86_R8), + REG(R9, PERF_REG_X86_R9), + REG(R10, PERF_REG_X86_R10), + REG(R11, PERF_REG_X86_R11), + REG(R12, PERF_REG_X86_R12), + REG(R13, PERF_REG_X86_R13), + REG(R14, PERF_REG_X86_R14), + REG(R15, PERF_REG_X86_R15), +#endif + REG_END +}; diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 980dbf76bc98..92c1fff2153e 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -5,6 +5,13 @@ struct regs_dump; +struct sample_reg { + const char *name; + uint64_t mask; +}; + +extern const struct sample_reg sample_reg_masks[]; + #ifdef HAVE_PERF_REGS_SUPPORT #include -- 2.4.3 From bcc84ec65ad1bd9f777a1fade6f8e5e0c5808fa5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 31 Aug 2015 18:41:12 +0200 Subject: [PATCH 6/9] perf record: Add ability to name registers to record This patch modifies the -I/--int-regs option to enablepassing the name of the registers to sample on interrupt. Registers can be specified by their symbolic names. For instance on x86, --intr-regs=ax,si. The motivation is to reduce the size of the perf.data file and the overhead of sampling by only collecting the registers useful to a specific analysis. For instance, for value profiling, sampling only the registers used to passed arguements to functions. With no parameter, the --intr-regs still records all possible registers based on the architecture. To name registers, it is necessary to use the long form of the option, i.e., --intr-regs: $ perf record --intr-regs=si,di,r8,r9 ..... To record any possible registers: $ perf record -I ..... $ perf report --intr-regs ... To display the register, one can use perf report -D To list the available registers: $ perf record --intr-regs=\? available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15 Signed-off-by: Stephane Eranian Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441039273-16260-4-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 6 ++- tools/perf/builtin-record.c | 7 +++- tools/perf/perf.h | 2 +- tools/perf/util/Build | 1 + tools/perf/util/evsel.c | 2 +- tools/perf/util/parse-regs-options.c | 71 ++++++++++++++++++++++++++++++++ tools/perf/util/parse-regs-options.h | 5 +++ 7 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 tools/perf/util/parse-regs-options.c create mode 100644 tools/perf/util/parse-regs-options.h diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 347a27322ed8..2e9ce77b5e14 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -276,7 +276,11 @@ filter out the startup phase of the program, which is often very different. --intr-regs:: Capture machine state (registers) at interrupt, i.e., on counter overflows for each sample. List of captured registers depends on the architecture. This option -is off by default. +is off by default. It is possible to select the registers to sample using their +symbolic names, e.g. on x86, ax, si. To list the available registers use +--intr-regs=\?. To name registers, pass a comma separated list such as +--intr-regs=ax,bx. The list of register is architecture dependent. + --running-time:: Record running and enabled time for read events (:S) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1d14f382f614..142eeb341b29 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -27,8 +27,10 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/data.h" +#include "util/perf_regs.h" #include "util/auxtrace.h" #include "util/parse-branch-options.h" +#include "util/parse-regs-options.h" #include #include @@ -1080,8 +1082,9 @@ struct option __record_options[] = { "sample transaction flags (special events only)"), OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, "use per-thread mmaps"), - OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, - "Sample machine registers on interrupt"), + OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", + "sample selected machine registers on interrupt," + " use -I ? to list register names", parse_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), OPT_CALLBACK('k', "clockid", &record.opts, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index cccb4cf575d3..90129accffbe 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -54,7 +54,6 @@ struct record_opts { bool sample_time_set; bool callgraph_set; bool period; - bool sample_intr_regs; bool running_time; bool full_auxtrace; bool auxtrace_snapshot_mode; @@ -64,6 +63,7 @@ struct record_opts { unsigned int auxtrace_mmap_pages; unsigned int user_freq; u64 branch_stack; + u64 sample_intr_regs; u64 default_interval; u64 user_interval; size_t auxtrace_snapshot_size; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e79e4522368a..349bc96ca1fe 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -83,6 +83,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o +libperf-y += parse-regs-options.o libperf-$(CONFIG_LIBELF) += symbol-elf.o libperf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index bac25f41a751..c53f79123b37 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -787,7 +787,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) perf_evsel__config_callgraph(evsel, opts, &callchain_param); if (opts->sample_intr_regs) { - attr->sample_regs_intr = PERF_REGS_MASK; + attr->sample_regs_intr = opts->sample_intr_regs; perf_evsel__set_sample_bit(evsel, REGS_INTR); } diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c new file mode 100644 index 000000000000..4f2c1c255d81 --- /dev/null +++ b/tools/perf/util/parse-regs-options.c @@ -0,0 +1,71 @@ +#include "perf.h" +#include "util/util.h" +#include "util/debug.h" +#include "util/parse-options.h" +#include "util/parse-regs-options.h" + +int +parse_regs(const struct option *opt, const char *str, int unset) +{ + uint64_t *mode = (uint64_t *)opt->value; + const struct sample_reg *r; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* + * cannot set it twice + */ + if (*mode) + return -1; + + /* str may be NULL in case no arg is passed to -I */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + if (!strcmp(s, "?")) { + fprintf(stderr, "available registers: "); + for (r = sample_reg_masks; r->name; r++) { + fprintf(stderr, "%s ", r->name); + } + fputc('\n', stderr); + /* just printing available regs */ + return -1; + } + for (r = sample_reg_masks; r->name; r++) { + if (!strcasecmp(s, r->name)) + break; + } + if (!r->name) { + ui__warning("unknown register %s," + " check man page\n", s); + goto error; + } + + *mode |= r->mask; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + /* default to all possible regs */ + if (*mode == 0) + *mode = PERF_REGS_MASK; +error: + free(os); + return ret; +} diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h new file mode 100644 index 000000000000..7d762b188007 --- /dev/null +++ b/tools/perf/util/parse-regs-options.h @@ -0,0 +1,5 @@ +#ifndef _PERF_PARSE_REGS_OPTIONS_H +#define _PERF_PARSE_REGS_OPTIONS_H 1 +struct option; +int parse_regs(const struct option *opt, const char *str, int unset); +#endif /* _PERF_PARSE_REGS_OPTIONS_H */ -- 2.4.3 From 3b27d13940c3710a1128527c43719cb0bb05d73b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 1 Sep 2015 03:29:44 +0000 Subject: [PATCH 7/9] perf dwarf: Fix potential array out of bounds access There is a problem in the dwarf-regs.c files for sh, sparc and x86 where it is possible to make an out-of-bounds array access when searching for register names. This patch fixes it by replacing '<=' to '<', so when register (number == XXX_MAX_REGS), get_arch_regstr() will return NULL. Signed-off-by: Wang Nan Reviewed-by: Matt Fleming Acked-by: Jiri Olsa Acked-by: Masami Hiramatsu Cc: David S. Miller Cc: Zefan Li Cc: pi3orama@huawei.com Link: http://lkml.kernel.org/r/1441078184-105038-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/sh/util/dwarf-regs.c | 2 +- tools/perf/arch/sparc/util/dwarf-regs.c | 2 +- tools/perf/arch/x86/util/dwarf-regs.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c index 0d0897f57a10..f8dfa89696f4 100644 --- a/tools/perf/arch/sh/util/dwarf-regs.c +++ b/tools/perf/arch/sh/util/dwarf-regs.c @@ -51,5 +51,5 @@ const char *sh_regs_table[SH_MAX_REGS] = { /* Return architecture dependent register string (for kprobe-tracer) */ const char *get_arch_regstr(unsigned int n) { - return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL; + return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL; } diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c index 92eda412fed3..b704fdb9237a 100644 --- a/tools/perf/arch/sparc/util/dwarf-regs.c +++ b/tools/perf/arch/sparc/util/dwarf-regs.c @@ -39,5 +39,5 @@ const char *sparc_regs_table[SPARC_MAX_REGS] = { */ const char *get_arch_regstr(unsigned int n) { - return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; + return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; } diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index be22dd463232..a08de0a35b83 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -71,5 +71,5 @@ const char *x86_64_regs_table[X86_64_MAX_REGS] = { /* Return architecture dependent register string (for kprobe-tracer) */ const char *get_arch_regstr(unsigned int n) { - return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; + return (n < ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; } -- 2.4.3 From 04aa90b529ee45c5ee88997bc214202e07b26979 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 1 Sep 2015 05:56:45 +0000 Subject: [PATCH 8/9] perf build: Fix Intel PT instruction decoder dependency problem I hit following building error randomly: ... /bin/sh: /path/to/kernel/buildperf/util/intel-pt-decoder/inat-tables.c: No such file or directory ... LINK /path/to/kernel/buildperf/plugin_mac80211.so LINK /path/to/kernel/buildperf/plugin_kmem.so LINK /path/to/kernel/buildperf/plugin_xen.so LINK /path/to/kernel/buildperf/plugin_hrtimer.so In file included from util/intel-pt-decoder/intel-pt-insn-decoder.c:25:0: util/intel-pt-decoder/inat.c:24:25: fatal error: inat-tables.c: No such file or directory #include "inat-tables.c" ^ compilation terminated. make[4]: *** [/path/to/kernel/buildperf/util/intel-pt-decoder/intel-pt-insn-decoder.o] Error 1 make[4]: *** Waiting for unfinished jobs.... LINK /path/to/kernel/buildperf/plugin_function.so This is caused by tools/perf/util/intel-pt-decoder/Build that, it tries to generate $(OUTPUT)util/intel-pt-decoder/inat-tables.c atomatically but forget to ensure the existance of $(OUTPUT)util/intel-pt-decoder directory. This patch fixes it by adding $(call rule_mkdir) like other similar rules. Signed-off-by: Wang Nan Acked-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1441087005-107540-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/Build | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 240730d682c1..2386322ece4f 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -4,6 +4,7 @@ inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call rule_mkdir) @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c -- 2.4.3 From af4aeadd8c04303c0aa2d112145c3627e2ebd026 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 1 Sep 2015 11:30:14 +0200 Subject: [PATCH 9/9] perf tools: Fix link time error with sample_reg_masks on non x86 This patch makes perf compile on non x86 platforms by defining a weak symbol for sample_reg_masks[] in util/perf_regs.c. The patch also moves the REG() and REG_END() macros into the util/per_regs.h header file. The macros are renamed to SMPL_REG/SMPL_REG_END to avoid clashes with other header files. Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441099814-26783-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/perf_regs.c | 44 +++++++++++++++++------------------- tools/perf/util/perf_regs.c | 4 ++++ tools/perf/util/perf_regs.h | 2 ++ 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 087c84ef5234..c5db14f36cc7 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -1,30 +1,28 @@ #include "../../perf.h" #include "../../util/perf_regs.h" -#define REG(n, b) { .name = #n, .mask = 1ULL << (b) } -#define REG_END { .name = NULL } const struct sample_reg sample_reg_masks[] = { - REG(AX, PERF_REG_X86_AX), - REG(BX, PERF_REG_X86_BX), - REG(CX, PERF_REG_X86_CX), - REG(DX, PERF_REG_X86_DX), - REG(SI, PERF_REG_X86_SI), - REG(DI, PERF_REG_X86_DI), - REG(BP, PERF_REG_X86_BP), - REG(SP, PERF_REG_X86_SP), - REG(IP, PERF_REG_X86_IP), - REG(FLAGS, PERF_REG_X86_FLAGS), - REG(CS, PERF_REG_X86_CS), - REG(SS, PERF_REG_X86_SS), + SMPL_REG(AX, PERF_REG_X86_AX), + SMPL_REG(BX, PERF_REG_X86_BX), + SMPL_REG(CX, PERF_REG_X86_CX), + SMPL_REG(DX, PERF_REG_X86_DX), + SMPL_REG(SI, PERF_REG_X86_SI), + SMPL_REG(DI, PERF_REG_X86_DI), + SMPL_REG(BP, PERF_REG_X86_BP), + SMPL_REG(SP, PERF_REG_X86_SP), + SMPL_REG(IP, PERF_REG_X86_IP), + SMPL_REG(FLAGS, PERF_REG_X86_FLAGS), + SMPL_REG(CS, PERF_REG_X86_CS), + SMPL_REG(SS, PERF_REG_X86_SS), #ifdef HAVE_ARCH_X86_64_SUPPORT - REG(R8, PERF_REG_X86_R8), - REG(R9, PERF_REG_X86_R9), - REG(R10, PERF_REG_X86_R10), - REG(R11, PERF_REG_X86_R11), - REG(R12, PERF_REG_X86_R12), - REG(R13, PERF_REG_X86_R13), - REG(R14, PERF_REG_X86_R14), - REG(R15, PERF_REG_X86_R15), + SMPL_REG(R8, PERF_REG_X86_R8), + SMPL_REG(R9, PERF_REG_X86_R9), + SMPL_REG(R10, PERF_REG_X86_R10), + SMPL_REG(R11, PERF_REG_X86_R11), + SMPL_REG(R12, PERF_REG_X86_R12), + SMPL_REG(R13, PERF_REG_X86_R13), + SMPL_REG(R14, PERF_REG_X86_R14), + SMPL_REG(R15, PERF_REG_X86_R15), #endif - REG_END + SMPL_REG_END }; diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 43168fb0d9a2..885e8ac83997 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -2,6 +2,10 @@ #include "perf_regs.h" #include "event.h" +const struct sample_reg __weak sample_reg_masks[] = { + SMPL_REG_END +}; + int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { int i, idx = 0; diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 92c1fff2153e..2984dcc54d67 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -9,6 +9,8 @@ struct sample_reg { const char *name; uint64_t mask; }; +#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } +#define SMPL_REG_END { .name = NULL } extern const struct sample_reg sample_reg_masks[]; -- 2.4.3