Discussion:
[PATCH v4 01/16] tools lib bpf: Check return value of strdup when reading map names
(too old to reply)
Wang Nan
2015-12-08 02:27:09 UTC
Permalink
Commit 561bbccac72d08babafaa33fd7fa9100ec4c9fb6 ("tools lib bpf:
Extract and collect map names from BPF object file") forgets checking
return value of strdup(). This patch fixes it. It also checks names
pointer before strcmp() for safty.

Signed-off-by: Wang Nan <***@huawei.com>
Acked-by: Namhyung Kim <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
---
tools/lib/bpf/libbpf.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a298614..16485ab 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -527,14 +527,14 @@ bpf_object__init_maps(struct bpf_object *obj, void *data,
return 0;
}

-static void
+static int
bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
{
int i;
Elf_Data *symbols = obj->efile.symbols;

if (!symbols || maps_shndx < 0)
- return;
+ return -EINVAL;

for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
GElf_Sym sym;
@@ -556,9 +556,14 @@ bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
continue;
}
obj->maps[map_idx].name = strdup(map_name);
+ if (!obj->maps[map_idx].name) {
+ pr_warning("failed to alloc map name\n");
+ return -ENOMEM;
+ }
pr_debug("map %zu is \"%s\"\n", map_idx,
obj->maps[map_idx].name);
}
+ return 0;
}

static int bpf_object__elf_collect(struct bpf_object *obj)
@@ -663,7 +668,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
}

if (maps_shndx >= 0)
- bpf_object__init_maps_name(obj, maps_shndx);
+ err = bpf_object__init_maps_name(obj, maps_shndx);
out:
return err;
}
@@ -1372,7 +1377,7 @@ bpf_object__get_map_by_name(struct bpf_object *obj, const char *name)
struct bpf_map *pos;

bpf_map__for_each(pos, obj) {
- if (strcmp(pos->name, name) == 0)
+ if (pos->name && !strcmp(pos->name, name))
return pos;
}
return NULL;
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:27:28 UTC
Permalink
From: He Kuang <***@huawei.com>

Make perf-record command support --vmlinux option if BPF_PROLOGUE is on.

'perf record' needs vmlinux as the source of DWARF info to generate
prologue for BPF programs, so path of vmlinux should be specified.

Short name 'k' has been taken by 'clockid'. This patch skips the short
option name and use '--vmlinux' for vmlinux path.

Documentation is also updated.

Test result:

In a production (or broken) environment:
(built by
# rm -rf ~/.debug/
# mv /lib/modules/`uname -r`/build/vmlinux /tmp/
)

# ./perf record -e ./test_bpf_base.c ls
Failed to find the path for kernel: No such file or directory
event syntax error: './test_bpf_base.c'
\___ You need to check probing points in BPF file
...

# ./perf record --vmlinux /tmp/vmlinux -e ./test_bpf_base.c ls
...
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.011 MB perf.data ]

Help messages when build with NO_LIBBPF:

# ./perf record -h
--transaction sample transaction flags (special events only)
--vmlinux <file> vmlinux pathname
(not build because NO_LIBBPF=1)
# ./perf record --vmlinux /tmp/vmlinux ls /
Warning: option `vmlinux' is not built because NO_LIBBPF=1
...
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.011 MB perf.data (11 samples) ]

Help messages when build with NO_DWARF:

# ./perf record -h
--transaction sample transaction flags (special events only)
--vmlinux <file> vmlinux pathname
(not build because NO_DWARF=1)

Signed-off-by: He Kuang <***@huawei.com>
Signed-off-by: Wang Nan <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/Documentation/perf-record.txt | 10 ++++++++--
tools/perf/builtin-record.c | 16 ++++++++++++++++
2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e630a7d..8d032f4 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -314,11 +314,17 @@ This option sets the time out limit. The default value is 500 ms.
Record context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.

---clang-path::
+--clang-path=PATH::
Path to clang binary to use for compiling BPF scriptlets.
+(enabled when BPF support is on)

---clang-opt::
+--clang-opt=OPTIONS::
Options passed to clang when compiling BPF scriptlets.
+(enabled when BPF support is on)
+
+--vmlinux=PATH::
+Specify vmlinux path which has debuginfo.
+(enabled when BPF prologue is on)

SEE ALSO
--------
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 11bf32d..2230b85 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1128,6 +1128,8 @@ struct option __record_options[] = {
"clang binary to use for compiling BPF scriptlets"),
OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
"options passed to clang when compiling BPF scriptlets"),
+ OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
OPT_END()
};

@@ -1146,6 +1148,20 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
# undef set_nobuild
#endif

+#ifndef HAVE_BPF_PROLOGUE
+# if !defined (HAVE_DWARF_SUPPORT)
+# define REASON "NO_DWARF=1"
+# elif !defined (HAVE_LIBBPF_SUPPORT)
+# define REASON "NO_LIBBPF=1"
+# else
+# define REASON "this architecture doesn't support BPF prologue"
+# endif
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
+ set_nobuild('\0', "vmlinux", true);
+# undef set_nobuild
+# undef REASON
+#endif
+
rec->evlist = perf_evlist__new();
if (rec->evlist == NULL)
return -ENOMEM;
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:27:42 UTC
Permalink
bpf_perf_event_output() outputs data through sample->raw_data. This
patch adds support to convert those data into CTF. A python script
then can be used to process output data from BPF programs.

Test result:

# cat ./test_bpf_output.c
/************************ BEGIN **************************/
typedef int u32;
typedef unsigned long long u64;

enum bpf_map_type {
BPF_MAP_TYPE_PERF_EVENT_ARRAY = 4,
};

struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
#define SEC(NAME) __attribute__((section(NAME), used))
static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) =
(void *)23;
struct bpf_map_def SEC("maps") channel = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(u32),
.max_entries = __NR_CPUS__,
};
static inline int __attribute__((always_inline))
func(void *ctx, int type)
{
struct {
u64 ktime;
int type;
} __attribute__((packed)) output_data;
char error_data[] = "Error: failed to output\n";
int err;
output_data.type = type;
output_data.ktime = bpf_ktime_get_ns();
err = bpf_perf_event_output(ctx, &channel, bpf_get_smp_processor_id(),
&output_data, sizeof(output_data));
if (err)
bpf_trace_printk(error_data, sizeof(error_data));
return 0;
}
SEC("func_begin=sys_nanosleep")
int func_begin(void *ctx) {return func(ctx, 1);}
SEC("func_end=sys_nanosleep%return")
int func_end(void *ctx) { return func(ctx, 2);}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************ END ***************************/

# ./perf record -e evt=bpf-output/no-inherit/ \
-e ./test_bpf_output_2.c/maps:channel.event=evt/ \
usleep 100000
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data (2 samples) ]

# ./perf script
usleep 14942 92503.198504: evt=bpf-output/no-inherit/: ffffffff810e0ba1 sys_nanosleep (/lib/modules/4.3.0....
usleep 14942 92503.298562: evt=bpf-output/no-inherit/: ffffffff810585e9 kretprobe_trampoline_holder (/lib....

# ./perf data convert --to-ctf ./out.ctf
[ perf data convert: Converted 'perf.data' into CTF data './out.ctf' ]
[ perf data convert: Converted and wrote 0.000 MB (2 samples) ]

# babeltrace ./out.ctf
[01:41:43.198504134] (+?.?????????) evt=bpf-output/no-inherit/: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810E0BA1, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x32C0C07B, [1] = 0x5421, [2] = 0x1 ] }
[01:41:43.298562257] (+0.100058123) evt=bpf-output/no-inherit/: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810585E9, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x38B77FAA, [1] = 0x5421, [2] = 0x2 ] }

# cat ./test_bpf_output_2.py
from babeltrace import TraceCollection
tc = TraceCollection(
tc.add_trace('./out.ctf', 'ctf')
d = {1:[], 2:[]}
for event in tc.events:
if not event.name.startswith('evt=bpf-output/no-inherit/'):
continue
raw_data = event['raw_data']
(time, type) = ((raw_data[0] + (raw_data[1] << 32)), raw_data[2])
d[type].append(time)
print(list(map(lambda i: d[2][i] - d[1][i], range(len(d[1]))))));

# python3 ./test_bpf_output_2.py
[100056879]

Signed-off-by: Wang Nan <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Brendan Gregg <***@gmail.com>
Cc: David S. Miller <***@davemloft.net>
Cc: Jiri Olsa <***@kernel.org>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/util/data-convert-bt.c | 115 +++++++++++++++++++++++++++++++++++++-
1 file changed, 114 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 34cd1e4..1fb472b 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -352,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw,
return ret;
}

+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct perf_sample *sample)
+{
+ struct bt_ctf_field_type *len_type, *seq_type;
+ struct bt_ctf_field *len_field, *seq_field;
+ unsigned int raw_size = sample->raw_size;
+ unsigned int nr_elements = raw_size / sizeof(u32);
+ unsigned int i;
+ int ret;
+
+ if (nr_elements * sizeof(u32) != raw_size)
+ pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+ raw_size, nr_elements * sizeof(u32) - raw_size);
+
+ len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+ len_field = bt_ctf_field_create(len_type);
+ if (!len_field) {
+ pr_err("failed to create 'raw_len' for bpf output event\n");
+ ret = -1;
+ goto put_len_type;
+ }
+
+ ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+ if (ret) {
+ pr_err("failed to set field value for raw_len\n");
+ goto put_len_field;
+ }
+ ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+ if (ret) {
+ pr_err("failed to set payload to raw_len\n");
+ goto put_len_field;
+ }
+
+ seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+ seq_field = bt_ctf_field_create(seq_type);
+ if (!seq_field) {
+ pr_err("failed to create 'raw_data' for bpf output event\n");
+ ret = -1;
+ goto put_seq_type;
+ }
+
+ ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+ if (ret) {
+ pr_err("failed to set length of 'raw_data'\n");
+ goto put_seq_field;
+ }
+
+ for (i = 0; i < nr_elements; i++) {
+ struct bt_ctf_field *elem_field =
+ bt_ctf_field_sequence_get_field(seq_field, i);
+
+ ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+ ((u32 *)(sample->raw_data))[i]);
+
+ bt_ctf_field_put(elem_field);
+ if (ret) {
+ pr_err("failed to set raw_data[%d]\n", i);
+ goto put_seq_field;
+ }
+ }
+
+ ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+ if (ret)
+ pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+ bt_ctf_field_put(seq_field);
+put_seq_type:
+ bt_ctf_field_type_put(seq_type);
+put_len_field:
+ bt_ctf_field_put(len_field);
+put_len_type:
+ bt_ctf_field_type_put(len_type);
+ return ret;
+}
+
static int add_generic_values(struct ctf_writer *cw,
struct bt_ctf_event *event,
struct perf_evsel *evsel,
@@ -597,6 +675,13 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}

+ if ((evsel->attr.type == PERF_TYPE_SOFTWARE) &&
+ (evsel->attr.config == PERF_COUNT_SW_BPF_OUTPUT)) {
+ ret = add_bpf_output_values(event_class, event, sample);
+ if (ret)
+ return -1;
+ }
+
cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
if (cs) {
if (is_flush_needed(cs))
@@ -744,6 +829,25 @@ static int add_tracepoint_types(struct ctf_writer *cw,
return ret;
}

+static int add_bpf_output_types(struct ctf_writer *cw,
+ struct bt_ctf_event_class *class)
+{
+ struct bt_ctf_field_type *len_type = cw->data.u32;
+ struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+ struct bt_ctf_field_type *seq_type;
+ int ret;
+
+ ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+ if (ret)
+ return ret;
+
+ seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+ if (!seq_type)
+ return -1;
+
+ return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
struct bt_ctf_event_class *event_class)
{
@@ -755,7 +859,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
* ctf event header
* PERF_SAMPLE_READ - TODO
* PERF_SAMPLE_CALLCHAIN - TODO
- * PERF_SAMPLE_RAW - tracepoint fields are handled separately
+ * PERF_SAMPLE_RAW - tracepoint fields and BPF output
+ * are handled separately
* PERF_SAMPLE_BRANCH_STACK - TODO
* PERF_SAMPLE_REGS_USER - TODO
* PERF_SAMPLE_STACK_USER - TODO
@@ -824,6 +929,14 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel)
goto err;
}

+ if ((evsel->attr.type == PERF_TYPE_SOFTWARE) &&
+ (evsel->attr.config == PERF_COUNT_SW_BPF_OUTPUT)) {
+ ret = add_bpf_output_types(cw, event_class);
+ if (ret)
+ goto err;
+
+ }
+
ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
if (ret) {
pr("Failed to add event class into stream.\n");
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:27:52 UTC
Permalink
Commit a43eec304259a6c637f4014a6d4767159b6a3aa3 (bpf: introduce
bpf_perf_event_output() helper) add a helper to enable BPF program
output data to perf ring buffer through a new type of perf event
PERF_COUNT_SW_BPF_OUTPUT. This patch enable perf to create perf
event of that type. Now perf user can use following cmdline to
receive output data from BPF programs:

# ./perf record -a -e evt=bpf-output/no-inherit/ \
-e ./test_bpf_output.c/maps:channel.event=evt/ ls /
# ./perf script
perf 12927 [004] 355971.129276: 0 evt=bpf-output/no-inherit/: ffffffff811ed5f1 sys_write
perf 12927 [004] 355971.129279: 0 evt=bpf-output/no-inherit/: ffffffff811ed5f1 sys_write
...

Test result:
# cat ./test_bpf_output.c
/************************ BEGIN **************************/
typedef int u32;
typedef unsigned long long u64;

enum bpf_map_type {
BPF_MAP_TYPE_PERF_EVENT_ARRAY = 4,
};

struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};

#define SEC(NAME) __attribute__((section(NAME), used))
static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) =
(void *)23;

struct bpf_map_def SEC("maps") channel = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(u32),
.max_entries = __NR_CPUS__,
};

SEC("func_write=sys_write")
int func_write(void *ctx)
{
struct {
u64 ktime;
int cpuid;
} __attribute__((packed)) output_data;
char error_data[] = "Error: failed to output\n";

output_data.cpuid = bpf_get_smp_processor_id();
output_data.ktime = bpf_ktime_get_ns();
int err = bpf_perf_event_output(ctx, &channel, bpf_get_smp_processor_id(),
&output_data, sizeof(output_data));
if (err)
bpf_trace_printk(error_data, sizeof(error_data));
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************ END ***************************/

# ./perf record -a -e evt=bpf-output/no-inherit/ \
-e ./test_bpf_output.c/maps:channel.event=evt/ ls /
# ./perf script | grep ls
ls 4085 [000] 2746114.230215: evt=bpf-output/no-inherit/: ffffffff811ed5f1 sys_write (/lib/modules/4.3.0-rc4+/build/vmlinux)
ls 4085 [000] 2746114.230244: evt=bpf-output/no-inherit/: ffffffff811ed5f1 sys_write (/lib/modules/4.3.0-rc4+/build/vmlinux)

Signed-off-by: Wang Nan <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Brendan Gregg <***@gmail.com>
Cc: David S. Miller <***@davemloft.net>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/util/evsel.c | 6 ++++++
tools/perf/util/parse-events.l | 1 +
2 files changed, 7 insertions(+)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8e0e6f4..82ece81 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -224,6 +224,12 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
if (evsel != NULL)
perf_evsel__init(evsel, attr, idx);

+ if ((evsel->attr.type == PERF_TYPE_SOFTWARE) &&
+ (evsel->attr.config == PERF_COUNT_SW_BPF_OUTPUT)) {
+ evsel->attr.sample_type |= PERF_SAMPLE_RAW;
+ evsel->attr.sample_period = 1;
+ }
+
return evsel;
}

diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 8bb3437..27d567f 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -249,6 +249,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU
alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }

/*
* We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:28:08 UTC
Permalink
Add hexdamical u32 to base data type, which is useful for raw output
because raw data are u32 aligned.

Signed-off-by: Wang Nan <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Brendan Gregg <***@gmail.com>
Cc: David S. Miller <***@davemloft.net>
Cc: Jiri Olsa <***@kernel.org>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/util/data-convert-bt.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 5bfc119..34cd1e4 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -63,6 +63,7 @@ struct ctf_writer {
struct bt_ctf_field_type *s32;
struct bt_ctf_field_type *u32;
struct bt_ctf_field_type *string;
+ struct bt_ctf_field_type *u32_hex;
struct bt_ctf_field_type *u64_hex;
};
struct bt_ctf_field_type *array[6];
@@ -982,6 +983,7 @@ do { \
CREATE_INT_TYPE(cw->data.u64, 64, false, false);
CREATE_INT_TYPE(cw->data.s32, 32, true, false);
CREATE_INT_TYPE(cw->data.u32, 32, false, false);
+ CREATE_INT_TYPE(cw->data.u32_hex, 32, false, true);
CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true);

cw->data.string = bt_ctf_field_type_string_create();
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
tip-bot for Wang Nan
2015-12-14 08:39:01 UTC
Permalink
Commit-ID: 26812d466b2633d0c772fe3aca954129f150d3cb
Gitweb: http://git.kernel.org/tip/26812d466b2633d0c772fe3aca954129f150d3cb
Author: Wang Nan <***@huawei.com>
AuthorDate: Tue, 8 Dec 2015 02:25:39 +0000
Committer: Arnaldo Carvalho de Melo <***@redhat.com>
CommitDate: Fri, 11 Dec 2015 09:12:09 -0300

perf data: Add u32_hex data type

Add hexadecimal u32 to base data type, which is useful for raw output
because raw data is u32 aligned.

Signed-off-by: Wang Nan <***@huawei.com>
Cc: David S. Miller <***@davemloft.net>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Brendan Gregg <***@gmail.com>
Cc: David S. Miller <***@davemloft.net>
Cc: Jiri Olsa <***@kernel.org>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
Link: http://lkml.kernel.org/r/1449541544-67621-12-git-send-email-***@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <***@redhat.com>
---
tools/perf/util/data-convert-bt.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 5bfc119..34cd1e4 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -63,6 +63,7 @@ struct ctf_writer {
struct bt_ctf_field_type *s32;
struct bt_ctf_field_type *u32;
struct bt_ctf_field_type *string;
+ struct bt_ctf_field_type *u32_hex;
struct bt_ctf_field_type *u64_hex;
};
struct bt_ctf_field_type *array[6];
@@ -982,6 +983,7 @@ do { \
CREATE_INT_TYPE(cw->data.u64, 64, false, false);
CREATE_INT_TYPE(cw->data.s32, 32, true, false);
CREATE_INT_TYPE(cw->data.u32, 32, false, false);
+ CREATE_INT_TYPE(cw->data.u32_hex, 32, false, true);
CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true);

cw->data.string = bt_ctf_field_type_string_create();
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:28:25 UTC
Permalink
This patch keeps options of perf builtins same in all conditions. If
one option is disabled because of compiling options, users should be
notified.

Masami suggested another implementation in [1] that, by adding a
OPTION_NEXT_DEPENDS option before those options in the 'struct option'
array, options parser knows an option is disabled. However, in some
cases this array is reordered (options__order()). In addition, in
parse-option.c that array is const, so we can't simply merge
information in decorator option into the affacted option.

This patch chooses a simpler implementation that, introducing a
set_option_nobuild() function and two option parsing flags. Builtins
with such options should call set_option_nobuild() before option
parsing. The complexity of this patch is because we want some of options
can be skipped safely. In this case their arguments should also be
consumed.

Options in 'perf record' and 'perf probe' are fixed in this patch.

[1] http://lkml.kernel.org/g/***@GSjpTKYDCembx32.service.hitachi.net

Test result:

Normal case:

# ./perf probe --vmlinux /tmp/vmlinux sys_write
Added new event:
probe:sys_write (on sys_write)

You can now use it in all perf tools, such as:

perf record -e probe:sys_write -aR sleep 1


Build with NO_DWARF=1:

# ./perf probe -L sys_write
Error: switch `L' is not built because NO_DWARF=1

Usage: perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]
or: perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]
or: perf probe [<options>] --del '[GROUP:]EVENT' ...
or: perf probe --list [GROUP:]EVENT ...
or: perf probe [<options>] --funcs

-L, --line <FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]>
Show source code lines.
(not build because NO_DWARF=1)

# ./perf probe -k /tmp/vmlinux sys_write
Warning: switch `k' is not built because NO_DWARF=1
Added new event:
probe:sys_write (on sys_write)

You can now use it in all perf tools, such as:

perf record -e probe:sys_write -aR sleep 1

# ./perf probe --vmlinux /tmp/vmlinux sys_write
Warning: option `vmlinux' is not built because NO_DWARF=1
Added new event:
[SNIP]

# ./perf probe -l
Usage: perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]
or: perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]
..
-k, --vmlinux <file> vmlinux pathname
(not build because NO_DWARF=1)
-L, --line <FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]>
Show source code lines.
(not build because NO_DWARF=1)
..
-V, --vars <FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT>
Show accessible variables on PROBEDEF
(not build because NO_DWARF=1)
--externs Show external variables too (with --vars only)
(not build because NO_DWARF=1)
--no-inlines Don't search inlined functions
(not build because NO_DWARF=1)
--range Show variables location range in scope (with --vars only)
(not build because NO_DWARF=1)

Signed-off-by: Wang Nan <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/builtin-probe.c | 15 +++++-
tools/perf/builtin-record.c | 9 +++-
tools/perf/util/parse-options.c | 113 ++++++++++++++++++++++++++++++++++++----
tools/perf/util/parse-options.h | 5 ++
4 files changed, 129 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 132afc9..dbe2ea5 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -249,6 +249,9 @@ static int opt_show_vars(const struct option *opt,

return ret;
}
+#else
+# define opt_show_lines NULL
+# define opt_show_vars NULL
#endif
static int opt_add_probe_event(const struct option *opt,
const char *str, int unset __maybe_unused)
@@ -473,7 +476,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
opt_add_probe_event),
OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
" with existing name"),
-#ifdef HAVE_DWARF_SUPPORT
OPT_CALLBACK('L', "line", NULL,
"FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
"Show source code lines.", opt_show_lines),
@@ -490,7 +492,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
"directory", "path to kernel source"),
OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
"Don't search inlined functions"),
-#endif
OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
"Set how many probe points can be found for a probe."),
@@ -521,6 +522,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
#ifdef HAVE_DWARF_SUPPORT
set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
+#else
+# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c)
+ set_nobuild('L', "line", false);
+ set_nobuild('V', "vars", false);
+ set_nobuild('\0', "externs", false);
+ set_nobuild('\0', "range", false);
+ set_nobuild('k', "vmlinux", true);
+ set_nobuild('s', "source", true);
+ set_nobuild('\0', "no-inlines", true);
+# undef set_nobuild
#endif
set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8479821..11bf32d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1124,12 +1124,10 @@ struct option __record_options[] = {
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
"Record context switch events"),
-#ifdef HAVE_LIBBPF_SUPPORT
OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
"clang binary to use for compiling BPF scriptlets"),
OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
"options passed to clang when compiling BPF scriptlets"),
-#endif
OPT_END()
};

@@ -1141,6 +1139,13 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
struct record *rec = &record;
char errbuf[BUFSIZ];

+#ifndef HAVE_LIBBPF_SUPPORT
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
+ set_nobuild('\0', "clang-path", true);
+ set_nobuild('\0', "clang-opt", true);
+# undef set_nobuild
+#endif
+
rec->evlist = perf_evlist__new();
if (rec->evlist == NULL)
return -ENOMEM;
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
index 9fca092..105e357 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/perf/util/parse-options.c
@@ -18,20 +18,34 @@ static int opterror(const struct option *opt, const char *reason, int flags)
return error("option `%s' %s", opt->long_name, reason);
}

+static void optwarning(const struct option *opt, const char *reason, int flags)
+{
+ if (flags & OPT_SHORT)
+ warning("switch `%c' %s", opt->short_name, reason);
+ else if (flags & OPT_UNSET)
+ warning("option `no-%s' %s", opt->long_name, reason);
+ else
+ warning("option `%s' %s", opt->long_name, reason);
+}
+
static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
int flags, const char **arg)
{
+ const char *res;
+
if (p->opt) {
- *arg = p->opt;
+ res = p->opt;
p->opt = NULL;
} else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 ||
**(p->argv + 1) == '-')) {
- *arg = (const char *)opt->defval;
+ res = (const char *)opt->defval;
} else if (p->argc > 1) {
p->argc--;
- *arg = *++p->argv;
+ res = *++p->argv;
} else
return opterror(opt, "requires a value", flags);
+ if (arg)
+ *arg = res;
return 0;
}

@@ -91,6 +105,59 @@ static int get_value(struct parse_opt_ctx_t *p,
}
}

+ if (opt->flags & PARSE_OPT_NOBUILD) {
+ char reason[128];
+ bool noarg = false;
+
+ err = snprintf(reason, sizeof(reason),
+ "is not built because %s",
+ opt->build_opt);
+ reason[sizeof(reason) - 1] = '\0';
+
+ if (err < 0)
+ strncpy(reason, "is not built", sizeof(reason));
+
+ if (!(opt->flags & PARSE_OPT_CANSKIP))
+ return opterror(opt, reason, flags);
+
+ err = 0;
+ if (unset)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_NOARG)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+ noarg = true;
+
+ switch (opt->type) {
+ case OPTION_BOOLEAN:
+ case OPTION_INCR:
+ case OPTION_BIT:
+ case OPTION_SET_UINT:
+ case OPTION_SET_PTR:
+ case OPTION_END:
+ case OPTION_ARGUMENT:
+ case OPTION_GROUP:
+ noarg = true;
+ break;
+ case OPTION_CALLBACK:
+ case OPTION_STRING:
+ case OPTION_INTEGER:
+ case OPTION_UINTEGER:
+ case OPTION_LONG:
+ case OPTION_U64:
+ default:
+ break;
+ }
+
+ if (!noarg)
+ err = get_arg(p, opt, flags, NULL);
+ if (err)
+ return err;
+
+ optwarning(opt, reason, flags);
+ return 0;
+ }
+
switch (opt->type) {
case OPTION_BIT:
if (unset)
@@ -647,6 +714,10 @@ static void print_option_help(const struct option *opts, int full)
pad = USAGE_OPTS_WIDTH;
}
fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+ if (opts->flags & PARSE_OPT_NOBUILD)
+ fprintf(stderr, "%*s(not build because %s)\n",
+ USAGE_OPTS_WIDTH + USAGE_GAP, "",
+ opts->build_opt);
}

static int option__cmp(const void *va, const void *vb)
@@ -853,15 +924,39 @@ int parse_opt_verbosity_cb(const struct option *opt,
return 0;
}

-void set_option_flag(struct option *opts, int shortopt, const char *longopt,
- int flag)
+static struct option *
+find_option(struct option *opts, int shortopt, const char *longopt)
{
for (; opts->type != OPTION_END; opts++) {
if ((shortopt && opts->short_name == shortopt) ||
(opts->long_name && longopt &&
- !strcmp(opts->long_name, longopt))) {
- opts->flags |= flag;
- break;
- }
+ !strcmp(opts->long_name, longopt)))
+ return opts;
}
+ return NULL;
+}
+
+void set_option_flag(struct option *opts, int shortopt, const char *longopt,
+ int flag)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (opt)
+ opt->flags |= flag;
+ return;
+}
+
+void set_option_nobuild(struct option *opts, int shortopt,
+ const char *longopt,
+ const char *build_opt,
+ bool can_skip)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (!opt)
+ return;
+
+ opt->flags |= PARSE_OPT_NOBUILD;
+ opt->flags |= can_skip ? PARSE_OPT_CANSKIP : 0;
+ opt->build_opt = build_opt;
}
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index a8e407b..2cac2aa 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -41,6 +41,8 @@ enum parse_opt_option_flags {
PARSE_OPT_DISABLED = 32,
PARSE_OPT_EXCLUSIVE = 64,
PARSE_OPT_NOEMPTY = 128,
+ PARSE_OPT_NOBUILD = 256,
+ PARSE_OPT_CANSKIP = 512,
};

struct option;
@@ -96,6 +98,7 @@ struct option {
void *value;
const char *argh;
const char *help;
+ const char *build_opt;

int flags;
parse_opt_cb *callback;
@@ -226,4 +229,6 @@ extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
extern const char *parse_options_fix_filename(const char *prefix, const char *file);

void set_option_flag(struct option *opts, int sopt, const char *lopt, int flag);
+void set_option_nobuild(struct option *opts, int shortopt, const char *longopt,
+ const char *build_opt, bool can_skip);
#endif /* __PERF_PARSE_OPTIONS_H */
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Arnaldo Carvalho de Melo
2015-12-11 12:40:37 UTC
Permalink
Post by Wang Nan
This patch keeps options of perf builtins same in all conditions. If
one option is disabled because of compiling options, users should be
notified.
Masami suggested another implementation in [1] that, by adding a
OPTION_NEXT_DEPENDS option before those options in the 'struct option'
array, options parser knows an option is disabled. However, in some
cases this array is reordered (options__order()). In addition, in
parse-option.c that array is const, so we can't simply merge
information in decorator option into the affacted option.
This patch chooses a simpler implementation that, introducing a
set_option_nobuild() function and two option parsing flags. Builtins
with such options should call set_option_nobuild() before option
parsing. The complexity of this patch is because we want some of options
can be skipped safely. In this case their arguments should also be
consumed.
Options in 'perf record' and 'perf probe' are fixed in this patch.
# ./perf probe --vmlinux /tmp/vmlinux sys_write
probe:sys_write (on sys_write)
perf record -e probe:sys_write -aR sleep 1
# ./perf probe -L sys_write
Error: switch `L' is not built because NO_DWARF=1
This should be:

Error: switch `L' is not built-in because NO_DWARF=1

But it would be better as:

Error: switch `L' is not available because NO_DWARF=1

What makes this an error is the fact that this option doesn't have that
CAN_SKIP flag, right? I.e. this SKIP flag determines the error/warning
message, for errors this should be "... is not available ..." for
warnings it should instead be "... is being ignored ...".
Post by Wang Nan
Usage: perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]
or: perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]
or: perf probe [<options>] --del '[GROUP:]EVENT' ...
or: perf probe --list [GROUP:]EVENT ...
or: perf probe [<options>] --funcs
-L, --line <FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]>
Show source code lines.
(not build because NO_DWARF=1)
# ./perf probe -k /tmp/vmlinux sys_write
Warning: switch `k' is not built because NO_DWARF=1
probe:sys_write (on sys_write)
perf record -e probe:sys_write -aR sleep 1
# ./perf probe --vmlinux /tmp/vmlinux sys_write
Warning: option `vmlinux' is not built because NO_DWARF=1
[SNIP]
# ./perf probe -l
Usage: perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]
or: perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]
...
-k, --vmlinux <file> vmlinux pathname
(not build because NO_DWARF=1)
-L, --line <FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]>
Show source code lines.
(not build because NO_DWARF=1)
...
Show accessible variables on PROBEDEF
(not build because NO_DWARF=1)
--externs Show external variables too (with --vars only)
(not build because NO_DWARF=1)
--no-inlines Don't search inlined functions
(not build because NO_DWARF=1)
--range Show variables location range in scope (with --vars only)
(not build because NO_DWARF=1)
---
tools/perf/builtin-probe.c | 15 +++++-
tools/perf/builtin-record.c | 9 +++-
tools/perf/util/parse-options.c | 113 ++++++++++++++++++++++++++++++++++++----
tools/perf/util/parse-options.h | 5 ++
4 files changed, 129 insertions(+), 13 deletions(-)
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 132afc9..dbe2ea5 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -249,6 +249,9 @@ static int opt_show_vars(const struct option *opt,
return ret;
}
+#else
+# define opt_show_lines NULL
+# define opt_show_vars NULL
#endif
static int opt_add_probe_event(const struct option *opt,
const char *str, int unset __maybe_unused)
@@ -473,7 +476,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
opt_add_probe_event),
OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
" with existing name"),
-#ifdef HAVE_DWARF_SUPPORT
OPT_CALLBACK('L', "line", NULL,
"FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
"Show source code lines.", opt_show_lines),
@@ -490,7 +492,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
"directory", "path to kernel source"),
OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
"Don't search inlined functions"),
-#endif
OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
"Set how many probe points can be found for a probe."),
@@ -521,6 +522,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
#ifdef HAVE_DWARF_SUPPORT
set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
+#else
+# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c)
+ set_nobuild('L', "line", false);
+ set_nobuild('V', "vars", false);
+ set_nobuild('\0', "externs", false);
+ set_nobuild('\0', "range", false);
+ set_nobuild('k', "vmlinux", true);
+ set_nobuild('s', "source", true);
+ set_nobuild('\0', "no-inlines", true);
+# undef set_nobuild
#endif
set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8479821..11bf32d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1124,12 +1124,10 @@ struct option __record_options[] = {
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
"Record context switch events"),
-#ifdef HAVE_LIBBPF_SUPPORT
OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
"clang binary to use for compiling BPF scriptlets"),
OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
"options passed to clang when compiling BPF scriptlets"),
-#endif
OPT_END()
};
@@ -1141,6 +1139,13 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
struct record *rec = &record;
char errbuf[BUFSIZ];
+#ifndef HAVE_LIBBPF_SUPPORT
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
+ set_nobuild('\0', "clang-path", true);
+ set_nobuild('\0', "clang-opt", true);
+# undef set_nobuild
+#endif
+
rec->evlist = perf_evlist__new();
if (rec->evlist == NULL)
return -ENOMEM;
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
index 9fca092..105e357 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/perf/util/parse-options.c
@@ -18,20 +18,34 @@ static int opterror(const struct option *opt, const char *reason, int flags)
return error("option `%s' %s", opt->long_name, reason);
}
+static void optwarning(const struct option *opt, const char *reason, int flags)
+{
+ if (flags & OPT_SHORT)
+ warning("switch `%c' %s", opt->short_name, reason);
+ else if (flags & OPT_UNSET)
+ warning("option `no-%s' %s", opt->long_name, reason);
+ else
+ warning("option `%s' %s", opt->long_name, reason);
+}
+
static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
int flags, const char **arg)
{
+ const char *res;
+
if (p->opt) {
- *arg = p->opt;
+ res = p->opt;
p->opt = NULL;
} else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 ||
**(p->argv + 1) == '-')) {
- *arg = (const char *)opt->defval;
+ res = (const char *)opt->defval;
} else if (p->argc > 1) {
p->argc--;
- *arg = *++p->argv;
+ res = *++p->argv;
} else
return opterror(opt, "requires a value", flags);
+ if (arg)
+ *arg = res;
return 0;
}
@@ -91,6 +105,59 @@ static int get_value(struct parse_opt_ctx_t *p,
}
}
+ if (opt->flags & PARSE_OPT_NOBUILD) {
+ char reason[128];
+ bool noarg = false;
+
+ err = snprintf(reason, sizeof(reason),
+ "is not built because %s",
+ opt->build_opt);
+ reason[sizeof(reason) - 1] = '\0';
+
+ if (err < 0)
+ strncpy(reason, "is not built", sizeof(reason));
+
+ if (!(opt->flags & PARSE_OPT_CANSKIP))
+ return opterror(opt, reason, flags);
+
+ err = 0;
+ if (unset)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_NOARG)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+ noarg = true;
+
+ switch (opt->type) {
+ noarg = true;
+ break;
+ break;
+ }
+
+ if (!noarg)
+ err = get_arg(p, opt, flags, NULL);
+ if (err)
+ return err;
+
+ optwarning(opt, reason, flags);
+ return 0;
+ }
+
switch (opt->type) {
if (unset)
@@ -647,6 +714,10 @@ static void print_option_help(const struct option *opts, int full)
pad = USAGE_OPTS_WIDTH;
}
fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+ if (opts->flags & PARSE_OPT_NOBUILD)
+ fprintf(stderr, "%*s(not build because %s)\n",
+ USAGE_OPTS_WIDTH + USAGE_GAP, "",
+ opts->build_opt);
}
static int option__cmp(const void *va, const void *vb)
@@ -853,15 +924,39 @@ int parse_opt_verbosity_cb(const struct option *opt,
return 0;
}
-void set_option_flag(struct option *opts, int shortopt, const char *longopt,
- int flag)
+static struct option *
+find_option(struct option *opts, int shortopt, const char *longopt)
{
for (; opts->type != OPTION_END; opts++) {
if ((shortopt && opts->short_name == shortopt) ||
(opts->long_name && longopt &&
- !strcmp(opts->long_name, longopt))) {
- opts->flags |= flag;
- break;
- }
+ !strcmp(opts->long_name, longopt)))
+ return opts;
}
+ return NULL;
+}
+
+void set_option_flag(struct option *opts, int shortopt, const char *longopt,
+ int flag)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (opt)
+ opt->flags |= flag;
+ return;
+}
+
+void set_option_nobuild(struct option *opts, int shortopt,
+ const char *longopt,
+ const char *build_opt,
+ bool can_skip)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (!opt)
+ return;
+
+ opt->flags |= PARSE_OPT_NOBUILD;
+ opt->flags |= can_skip ? PARSE_OPT_CANSKIP : 0;
+ opt->build_opt = build_opt;
}
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index a8e407b..2cac2aa 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -41,6 +41,8 @@ enum parse_opt_option_flags {
PARSE_OPT_DISABLED = 32,
PARSE_OPT_EXCLUSIVE = 64,
PARSE_OPT_NOEMPTY = 128,
+ PARSE_OPT_NOBUILD = 256,
+ PARSE_OPT_CANSKIP = 512,
};
struct option;
@@ -96,6 +98,7 @@ struct option {
void *value;
const char *argh;
const char *help;
+ const char *build_opt;
int flags;
parse_opt_cb *callback;
@@ -226,4 +229,6 @@ extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
extern const char *parse_options_fix_filename(const char *prefix, const char *file);
void set_option_flag(struct option *opts, int sopt, const char *lopt, int flag);
+void set_option_nobuild(struct option *opts, int shortopt, const char *longopt,
+ const char *build_opt, bool can_skip);
#endif /* __PERF_PARSE_OPTIONS_H */
--
1.8.3.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:28:38 UTC
Permalink
There are so many test cases use stack allocated 'struct machine'.
Including:
test__hists_link
test__hists_filter
test__mmap_thread_lookup
test__thread_mg_share
test__hists_output
test__hists_cumulate

Also, in non-test code (for example, machine__new_host()) there are
code use 'malloc()' to alloc struct machine.

These are dangerous operations, cause some tests fail or hung in
machines__exit(). For example, in

machines__exit ->
machine__destroy_kernel_maps ->
map_groups__remove ->
maps__remove ->
pthread_rwlock_wrlock

a incorrectly initialized lock causes unintended behavior.

This patch bzero() that structure in machine__init() to ensure all
fields in 'struct machine' are initialized to zero.

Signed-off-by: Wang Nan <***@huawei.com>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Jiri Olsa <***@redhat.com>
Cc: Namhyung Kim <***@kernel.org>
---
tools/perf/util/machine.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index bfc289c..188cfdf 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -25,6 +25,7 @@ static void dsos__init(struct dsos *dsos)

int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
+ bzero(machine, sizeof(*machine));
map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
tip-bot for Wang Nan
2015-12-14 08:39:39 UTC
Permalink
Commit-ID: 93b0ba3c60da89043ce2b9f601cd2b3da408903b
Gitweb: http://git.kernel.org/tip/93b0ba3c60da89043ce2b9f601cd2b3da408903b
Author: Wang Nan <***@huawei.com>
AuthorDate: Tue, 8 Dec 2015 02:25:44 +0000
Committer: Arnaldo Carvalho de Melo <***@redhat.com>
CommitDate: Fri, 11 Dec 2015 09:32:41 -0300

perf tools: Clear struct machine during machine__init()

There are so many test cases use stack allocated 'struct machine'.
Including:
test__hists_link
test__hists_filter
test__mmap_thread_lookup
test__thread_mg_share
test__hists_output
test__hists_cumulate

Also, in non-test code (for example, machine__new_host()) there are
code use 'malloc()' to alloc struct machine.

These are dangerous operations, cause some tests fail or hung in
machines__exit(). For example, in

machines__exit ->
machine__destroy_kernel_maps ->
map_groups__remove ->
maps__remove ->
pthread_rwlock_wrlock

a incorrectly initialized lock causes unintended behavior.

This patch memset(0) that structure in machine__init() to ensure all
fields in 'struct machine' are initialized to zero.

Signed-off-by: Wang Nan <***@huawei.com>
Cc: Jiri Olsa <***@redhat.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
Link: http://lkml.kernel.org/r/1449541544-67621-17-git-send-email-***@huawei.com
[ Use memset, see 'man bzero' ]
Signed-off-by: Arnaldo Carvalho de Melo <***@redhat.com>
---
tools/perf/util/machine.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index f5882b8..1407d51 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -25,6 +25,7 @@ static void dsos__init(struct dsos *dsos)

int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
+ memset(machine, 0, sizeof(*machine));
map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Wang Nan
2015-12-08 02:28:50 UTC
Permalink
bpf__config_obj() is introduced as a core API to config BPF object
after loading. One configuration option of maps is introduced. After
this patch BPF object can accept configuration like:

maps:my_map.value=1234

(maps.my_map.value looks pretty. However, there's a small but hard
to fixed problem related to flex's greedy matching. Please see [1].
Choose ':' to avoid it in a simpler way.)

This patch is more complex than the work it really does because the
consideration of extension. In designing of BPF map configuration,
following things should be considered:

1. Array indices selection: perf should allow user setting different
value to different slots in an array, with syntax like:
maps:my_map.value[0,3...6]=1234;

2. A map can be config by different config terms, each for a part
of it. For example, set each slot to pid of a thread;

3. Type of value: integer is not the only valid value type. Perf
event can also be put into a map after commit 35578d7984003097af2b1e3
(bpf: Implement function bpf_perf_event_read() that get the selected
hardware PMU conuter);

4. For hash table, it is possible to use string or other as key;

5. It is possible that map configuration is unable to be setup
during parsing. Perf event is an example.

Therefore, this patch does following:

1. Instead of updating map element during parsing, this patch stores
map config options in 'struct bpf_map_priv'. Following patches
would apply those configs at proper time;

2. Link map operations to a list so a map can have multiple config
terms attached, so different parts can be configured separately;

3. Make 'struct bpf_map_priv' extensible so following patches can
add new types of keys and operations;

4. Use bpf_config_map_funcs array to support more maps config options.

Since the patch changing event parser to parse BPF object config is
relative large, I put in another commit. Code in this patch
could be tested after applying next patch.

[1] http://lkml.kernel.org/g/***@huawei.com

Signed-off-by: Wang Nan <***@huawei.com>
Signed-off-by: He Kuang <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/util/bpf-loader.c | 266 +++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/bpf-loader.h | 38 +++++++
2 files changed, 304 insertions(+)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 540a7ef..7d361aa 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -739,6 +739,251 @@ int bpf__foreach_tev(struct bpf_object *obj,
return 0;
}

+enum bpf_map_op_type {
+ BPF_MAP_OP_SET_VALUE,
+};
+
+enum bpf_map_key_type {
+ BPF_MAP_KEY_ALL,
+};
+
+struct bpf_map_op {
+ struct list_head list;
+ enum bpf_map_op_type op_type;
+ enum bpf_map_key_type key_type;
+ union {
+ u64 value;
+ } v;
+};
+
+struct bpf_map_priv {
+ struct list_head ops_list;
+};
+
+static void
+bpf_map_op__free(struct bpf_map_op *op)
+{
+ struct list_head *list = &op->list;
+ /*
+ * bpf_map_op__free() needs to consider following cases:
+ * 1. When the op is created but not linked to any list:
+ * impossible. This only happen in bpf_map_op__alloc()
+ * and it would be freed directly;
+ * 2. Normal case, when the op is linked to a list;
+ * 3. After the op has already be removed.
+ * Thanks to list.h, if it has removed by list_del() then
+ * list->{next,prev} should have been set to LIST_POISON{1,2}.
+ */
+ if ((list->next != LIST_POISON1) && (list->prev != LIST_POISON2))
+ list_del(list);
+ free(op);
+}
+
+static void
+bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+ void *_priv)
+{
+ struct bpf_map_priv *priv = _priv;
+ struct bpf_map_op *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &priv->ops_list, list)
+ bpf_map_op__free(pos);
+ free(priv);
+}
+
+static struct bpf_map_op *
+bpf_map_op__alloc(struct bpf_map *map)
+{
+ struct bpf_map_op *op;
+ struct bpf_map_priv *priv;
+ const char *map_name;
+ int err;
+
+ map_name = bpf_map__get_name(map);
+ err = bpf_map__get_private(map, (void **)&priv);
+ if (err) {
+ pr_debug("Failed to get private from map %s\n", map_name);
+ return ERR_PTR(err);
+ }
+
+ if (!priv) {
+ priv = zalloc(sizeof(*priv));
+ if (!priv) {
+ pr_debug("No enough memory to alloc map private\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ INIT_LIST_HEAD(&priv->ops_list);
+
+ if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
+ free(priv);
+ return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
+ }
+ }
+
+ op = zalloc(sizeof(*op));
+ if (!op) {
+ pr_debug("Failed to alloc bpf_map_op\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ op->key_type = BPF_MAP_KEY_ALL;
+ list_add_tail(&op->list, &priv->ops_list);
+ return op;
+}
+
+static int
+bpf__obj_config_map_array_value(struct bpf_map *map,
+ struct parse_events_term *term)
+{
+ struct bpf_map_def def;
+ struct bpf_map_op *op;
+ const char *map_name;
+ int err;
+
+ map_name = bpf_map__get_name(map);
+
+ err = bpf_map__get_def(map, &def);
+ if (err) {
+ pr_debug("Unable to get map definition from '%s'\n",
+ map_name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (def.type != BPF_MAP_TYPE_ARRAY) {
+ pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
+ map_name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+ }
+ if (def.key_size < sizeof(unsigned int)) {
+ pr_debug("Map %s has incorrect key size\n", map_name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
+ }
+ switch (def.value_size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+ default:
+ pr_debug("Map %s has incorrect value size\n", map_name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+ }
+
+ op = bpf_map_op__alloc(map);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+ op->op_type = BPF_MAP_OP_SET_VALUE;
+ op->v.value = term->val.num;
+ return 0;
+}
+
+static int
+bpf__obj_config_map_value(struct bpf_map *map,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ if (!term->err_val) {
+ pr_debug("Config value not set\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+ }
+
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+ return bpf__obj_config_map_array_value(map, term);
+
+ pr_debug("ERROR: wrong value type\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+}
+
+struct bpf_obj_config_map_func {
+ const char *config_opt;
+ int (*config_func)(struct bpf_map *, struct parse_events_term *,
+ struct perf_evlist *);
+};
+
+struct bpf_obj_config_map_func bpf_obj_config_map_funcs[] = {
+ {"value", bpf__obj_config_map_value},
+};
+
+static int
+bpf__obj_config_map(struct bpf_object *obj,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist,
+ int *key_scan_pos)
+{
+ /* key is "maps:<mapname>.<config opt>" */
+ char *map_name = strdup(term->config + sizeof("maps:") - 1);
+ struct bpf_map *map;
+ int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+ char *map_opt;
+ size_t i;
+
+ if (!map_name)
+ return -ENOMEM;
+
+ map_opt = strchr(map_name, '.');
+ if (!map_opt) {
+ pr_debug("ERROR: Invalid map config: %s\n", map_name);
+ goto out;
+ }
+
+ *map_opt++ = '\0';
+ if (*map_opt == '\0') {
+ pr_debug("ERROR: Invalid map option: %s\n", term->config);
+ goto out;
+ }
+
+ map = bpf_object__get_map_by_name(obj, map_name);
+ if (!map) {
+ pr_debug("ERROR: Map %s is not exist\n", map_name);
+ err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
+ goto out;
+ }
+
+ *key_scan_pos += map_opt - map_name;
+ for (i = 0; i < ARRAY_SIZE(bpf_obj_config_map_funcs); i++) {
+ struct bpf_obj_config_map_func *func =
+ &bpf_obj_config_map_funcs[i];
+
+ if (strcmp(map_opt, func->config_opt) == 0) {
+ err = func->config_func(map, term, evlist);
+ goto out;
+ }
+ }
+
+ pr_debug("ERROR: invalid config option '%s' for maps\n",
+ map_opt);
+ err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
+out:
+ free(map_name);
+ if (!err)
+ key_scan_pos += strlen(map_opt);
+ return err;
+}
+
+int bpf__config_obj(struct bpf_object *obj,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist,
+ int *error_pos)
+{
+ int key_scan_pos = 0;
+ int err;
+
+ if (!obj || !term || !term->config)
+ return -EINVAL;
+
+ if (!prefixcmp(term->config, "maps:")) {
+ key_scan_pos = sizeof("maps:") - 1;
+ err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
+ goto out;
+ }
+ err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+out:
+ if (error_pos)
+ *error_pos = key_scan_pos;
+ return err;
+
+}
+
#define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START)
#define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
#define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -753,6 +998,14 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue",
[ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program",
[ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue",
+ [ERRCODE_OFFSET(OBJCONF_OPT)] = "Invalid object config option",
+ [ERRCODE_OFFSET(OBJCONF_CONF)] = "Config value not set (lost '=')",
+ [ERRCODE_OFFSET(OBJCONF_MAP_OPT)] = "Invalid object maps config option",
+ [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)] = "Target map not exist",
+ [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)] = "Incorrect value type for map",
+ [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type",
+ [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size",
+ [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size",
};

static int
@@ -872,3 +1125,16 @@ int bpf__strerror_load(struct bpf_object *obj,
bpf__strerror_end(buf, size);
return 0;
}
+
+int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+ struct parse_events_term *term __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ int *error_pos __maybe_unused, int err,
+ char *buf, size_t size)
+{
+ bpf__strerror_head(err, buf, size);
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
+ "Can't use this config term to this type of map");
+ bpf__strerror_end(buf, size);
+ return 0;
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 6fdc045..2464db9 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -10,6 +10,7 @@
#include <string.h>
#include <bpf/libbpf.h>
#include "probe-event.h"
+#include "evlist.h"
#include "debug.h"

enum bpf_loader_errno {
@@ -24,10 +25,19 @@ enum bpf_loader_errno {
BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */
BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */
BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */
+ BPF_LOADER_ERRNO__OBJCONF_OPT, /* Invalid object config option */
+ BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_OPT, /* Invalid object maps config option */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE, /* Incorrect value type for map */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
__BPF_LOADER_ERRNO__END,
};

struct bpf_object;
+struct parse_events_term;
#define PERF_BPF_PROBE_GROUP "perf_bpf_probe"

typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev,
@@ -53,6 +63,14 @@ int bpf__strerror_load(struct bpf_object *obj, int err,
char *buf, size_t size);
int bpf__foreach_tev(struct bpf_object *obj,
bpf_prog_iter_callback_t func, void *arg);
+
+int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
+ struct perf_evlist *evlist, int *error_pos);
+int bpf__strerror_config_obj(struct bpf_object *obj,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist,
+ int *error_pos, int err, char *buf,
+ size_t size);
#else
static inline struct bpf_object *
bpf__prepare_load(const char *filename __maybe_unused,
@@ -84,6 +102,15 @@ bpf__foreach_tev(struct bpf_object *obj __maybe_unused,
}

static inline int
+bpf__config_obj(struct bpf_object *obj __maybe_unused,
+ struct parse_events_term *term __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ int *error_pos __maybe_unused)
+{
+ return 0;
+}
+
+static inline int
__bpf_strerror(char *buf, size_t size)
{
if (!size)
@@ -118,5 +145,16 @@ static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused,
{
return __bpf_strerror(buf, size);
}
+
+static inline int
+bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+ struct parse_events_term *term __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ int *error_pos __maybe_unused,
+ int err __maybe_unused,
+ char *buf, size_t size)
+{
+ return __bpf_strerror(buf, size);
+}
#endif
#endif
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:29:18 UTC
Permalink
This patch introduces basic facilities to support config different
slots in a BPF map one by one.

array.nr_ranges and array.ranges are introduced into 'struct
parse_events_term', where ranges is an array of indices range (start,
length) which will be configured by this config term. nr_ranges
is the size of the array. The array is passed to 'struct bpf_map_priv'.
To indicate the new type of configuration, BPF_MAP_KEY_RANGES is
added as a new key type. bpf_map_config_foreach_key() is extended to
iterate over those indices instead of all possible keys.

Code in this commit will be enabled by following commit which enables
the indices syntax for array configuration.

Signed-off-by: Wang Nan <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/util/bpf-loader.c | 132 ++++++++++++++++++++++++++++++++++++++---
tools/perf/util/bpf-loader.h | 1 +
tools/perf/util/parse-events.c | 33 ++++++++++-
tools/perf/util/parse-events.h | 12 ++++
4 files changed, 170 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 97f5efc..8d232b8 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -17,6 +17,7 @@
#include "llvm-utils.h"
#include "probe-event.h"
#include "probe-finder.h" // for MAX_PROBES
+#include "parse-events.h"
#include "llvm-utils.h"

#define DEFINE_PRINT_FN(name, level) \
@@ -747,6 +748,7 @@ enum bpf_map_op_type {

enum bpf_map_key_type {
BPF_MAP_KEY_ALL,
+ BPF_MAP_KEY_RANGES,
};

struct bpf_map_op {
@@ -754,6 +756,9 @@ struct bpf_map_op {
enum bpf_map_op_type op_type;
enum bpf_map_key_type key_type;
union {
+ struct parse_events_array array;
+ } k;
+ union {
u64 value;
struct perf_evsel *evsel;
} v;
@@ -779,6 +784,8 @@ bpf_map_op__free(struct bpf_map_op *op)
*/
if ((list->next != LIST_POISON1) && (list->prev != LIST_POISON2))
list_del(list);
+ if (op->key_type == BPF_MAP_KEY_RANGES)
+ parse_events__clear_array(&op->k.array);
free(op);
}

@@ -794,8 +801,30 @@ bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
free(priv);
}

+static int
+bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term,
+ const char *map_name)
+{
+ op->key_type = BPF_MAP_KEY_ALL;
+
+ if (term->array.nr_ranges) {
+ size_t memsz = term->array.nr_ranges *
+ sizeof(op->k.array.ranges[0]);
+
+ op->k.array.ranges = memdup(term->array.ranges, memsz);
+ if (!op->k.array.ranges) {
+ pr_debug("No enough memory to alloc indices for %s\n",
+ map_name);
+ return -ENOMEM;
+ }
+ op->key_type = BPF_MAP_KEY_RANGES;
+ op->k.array.nr_ranges = term->array.nr_ranges;
+ }
+ return 0;
+}
+
static struct bpf_map_op *
-bpf_map_op__alloc(struct bpf_map *map)
+bpf_map_op__alloc(struct bpf_map *map, struct parse_events_term *term)
{
struct bpf_map_op *op;
struct bpf_map_priv *priv;
@@ -829,7 +858,12 @@ bpf_map_op__alloc(struct bpf_map *map)
return ERR_PTR(-ENOMEM);
}

- op->key_type = BPF_MAP_KEY_ALL;
+ err = bpf_map_op_setkey(op, term, map_name);
+ if (err) {
+ free(op);
+ return ERR_PTR(err);
+ }
+
list_add_tail(&op->list, &priv->ops_list);
return op;
}
@@ -872,7 +906,7 @@ bpf__obj_config_map_array_value(struct bpf_map *map,
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
}

- op = bpf_map_op__alloc(map);
+ op = bpf_map_op__alloc(map, term);
if (IS_ERR(op))
return PTR_ERR(op);
op->op_type = BPF_MAP_OP_SET_VALUE;
@@ -933,7 +967,7 @@ bpf__obj_config_map_array_event(struct bpf_map *map,
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}

- op = bpf_map_op__alloc(map);
+ op = bpf_map_op__alloc(map, term);
if (IS_ERR(op))
return PTR_ERR(op);

@@ -972,6 +1006,44 @@ struct bpf_obj_config_map_func bpf_obj_config_map_funcs[] = {
};

static int
+config_map_indices_range_check(struct parse_events_term *term,
+ struct bpf_map *map,
+ const char *map_name)
+{
+ struct parse_events_array *array = &term->array;
+ struct bpf_map_def def;
+ unsigned int i;
+ int err;
+
+ if (!array->nr_ranges)
+ return 0;
+ if (!array->ranges) {
+ pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
+ map_name, (int)array->nr_ranges);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ err = bpf_map__get_def(map, &def);
+ if (err) {
+ pr_debug("ERROR: Unable to get map definition from '%s'\n",
+ map_name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ for (i = 0; i < array->nr_ranges; i++) {
+ unsigned int start = array->ranges[i].start;
+ size_t length = array->ranges[i].length;
+ unsigned int idx = start + length - 1;
+
+ if (idx >= def.max_entries) {
+ pr_debug("ERROR: index %d too large\n", idx);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
+ }
+ }
+ return 0;
+}
+
+static int
bpf__obj_config_map(struct bpf_object *obj,
struct parse_events_term *term,
struct perf_evlist *evlist,
@@ -1007,6 +1079,13 @@ bpf__obj_config_map(struct bpf_object *obj,
}

*key_scan_pos += map_opt - map_name;
+
+ *key_scan_pos += strlen(map_opt);
+ err = config_map_indices_range_check(term, map, map_name);
+ if (err)
+ goto out;
+ *key_scan_pos -= strlen(map_opt);
+
for (i = 0; i < ARRAY_SIZE(bpf_obj_config_map_funcs); i++) {
struct bpf_obj_config_map_func *func =
&bpf_obj_config_map_funcs[i];
@@ -1077,6 +1156,33 @@ foreach_key_array_all(map_config_func_t func,
}

static int
+foreach_key_array_ranges(map_config_func_t func, void *arg,
+ const char *name, int map_fd,
+ struct bpf_map_def *pdef,
+ struct bpf_map_op *op)
+{
+ unsigned int i, j;
+ int err;
+
+ for (i = 0; i < op->k.array.nr_ranges; i++) {
+ unsigned int start = op->k.array.ranges[i].start;
+ size_t length = op->k.array.ranges[i].length;
+
+ for (j = 0; j < length; j++) {
+ unsigned int idx = start + j;
+
+ err = func(name, map_fd, pdef, op, &idx, arg);
+ if (err) {
+ pr_debug("ERROR: failed to insert value to %s[%u]\n",
+ name, idx);
+ return err;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
bpf_map_config_foreach_key(struct bpf_map *map,
map_config_func_t func,
void *arg)
@@ -1116,13 +1222,24 @@ bpf_map_config_foreach_key(struct bpf_map *map,
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
switch (op->key_type) {
case BPF_MAP_KEY_ALL:
- return foreach_key_array_all(func, arg, name,
- map_fd, &def, op);
+ err = foreach_key_array_all(func, arg, name,
+ map_fd, &def, op);
+ if (err)
+ return err;
+ break;
+ case BPF_MAP_KEY_RANGES:
+ err = foreach_key_array_ranges(func, arg, name,
+ map_fd, &def,
+ op);
+ if (err)
+ return err;
+ break;
default:
pr_debug("ERROR: keytype for map '%s' invalid\n",
name);
return -BPF_LOADER_ERRNO__INTERNAL;
- }
+ }
+ break;
default:
pr_debug("ERROR: type of '%s' incorrect\n", name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1309,6 +1426,7 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large",
[ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event",
[ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map",
+ [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)] = "Index too large",
};

static int
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index c9ce792..30ee519 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -38,6 +38,7 @@ enum bpf_loader_errno {
BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */
BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */
BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */
__BPF_LOADER_ERRNO__END,
};

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5d682dd..af3d657 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2148,8 +2148,39 @@ void parse_events__free_terms(struct list_head *terms)
{
struct parse_events_term *term, *h;

- list_for_each_entry_safe(term, h, terms, list)
+ list_for_each_entry_safe(term, h, terms, list) {
+ if (term->array.nr_ranges)
+ free(term->array.ranges);
free(term);
+ }
+}
+
+int parse_events__merge_arrays(struct parse_events_array *dest,
+ struct parse_events_array *another)
+{
+ struct parse_events_array new;
+
+ if (!dest || !another)
+ return -EINVAL;
+
+ new.nr_ranges = dest->nr_ranges + another->nr_ranges;
+ new.ranges = malloc(sizeof(new.ranges[0]) * new.nr_ranges);
+ if (!new.ranges)
+ return -ENOMEM;
+
+ memcpy(&new.ranges[0], dest->ranges,
+ sizeof(new.ranges[0]) * dest->nr_ranges);
+ memcpy(&new.ranges[dest->nr_ranges], another->ranges,
+ sizeof(new.ranges[0]) * another->nr_ranges);
+ free(dest->ranges);
+ free(another->ranges);
+ *dest = new;
+ return 0;
+}
+
+void parse_events__clear_array(struct parse_events_array *a)
+{
+ free(a->ranges);
}

void parse_events_evlist_error(struct parse_events_evlist *data,
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 20ad3c2..c34615f 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,8 +71,17 @@ enum {
PARSE_EVENTS__TERM_TYPE_INHERIT
};

+struct parse_events_array {
+ size_t nr_ranges;
+ struct {
+ unsigned int start;
+ size_t length;
+ } *ranges;
+};
+
struct parse_events_term {
char *config;
+ struct parse_events_array array;
union {
char *str;
u64 num;
@@ -117,6 +126,9 @@ int parse_events_term__sym_hw(struct parse_events_term **term,
int parse_events_term__clone(struct parse_events_term **new,
struct parse_events_term *term);
void parse_events__free_terms(struct list_head *terms);
+int parse_events__merge_arrays(struct parse_events_array *dest,
+ struct parse_events_array *another);
+void parse_events__clear_array(struct parse_events_array *a);
int parse_events__modifier_event(struct list_head *list, char *str, bool add);
int parse_events__modifier_group(struct list_head *list, char *event_mod);
int parse_events_name(struct list_head *list, char *name);
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:29:45 UTC
Permalink
This patch introduce a new syntax to perf event parser:

# perf record -e bpf_file.c/maps.mymap.value[0,3...5,7]=1234/ ...

By utilizing the basic facilities in bpf-loader.c which allow setting
different slots in a BPF map separately, the newly introduced syntax
allows perf to control specific elements in a BPF map.

Test result:

# cat ./test_bpf_map_3.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_ARRAY = 2,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
struct bpf_map_def SEC("maps") channel = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(unsigned char),
.max_entries = 100,
};
SEC("func=hrtimer_nanosleep rqtp->tv_nsec")
int func(void *ctx, int err, long nsec)
{
char fmt[] = "%ld\n";
long usec = nsec * 0x10624dd3 >> 38; // nsec / 1000
int key = (int)usec;
unsigned char *pval = map_lookup_elem(&channel, &key);

if (!pval)
return 0;
bpf_trace_printk(fmt, sizeof(fmt), (unsigned char)*pval);
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************* END ***************************/

Normal case:
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[0,1,2,3...5]=101/' usleep 2
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data ]
# cat /sys/kernel/debug/tracing/trace | grep usleep
usleep-405 [004] d... 2745423.547822: : 101
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[0...9,20...29]=102,maps:channel.value[10...19]=103/' usleep 3
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data ]
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[0...9,20...29]=102,maps:channel.value[10...19]=103/' usleep 15
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data ]
# cat /sys/kernel/debug/tracing/trace | grep usleep
usleep-405 [004] d... 2745423.547822: : 101
usleep-655 [006] d... 2745434.122814: : 102
usleep-904 [006] d... 2745439.916264: : 103
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[all]=104/' usleep 99
# cat /sys/kernel/debug/tracing/trace | grep usleep
usleep-405 [004] d... 2745423.547822: : 101
usleep-655 [006] d... 2745434.122814: : 102
usleep-904 [006] d... 2745439.916264: : 103
usleep-1537 [003] d... 2745538.053737: : 104

Error case:
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[10...1000]=104/' usleep 99
event syntax error: '..annel.value[10...1000]=104/'
\___ Index too large
Hint: Valid config terms:
maps:[<arraymap>].value<indices>=[value]
maps:[<eventmap>].event<indices>=[event]

where <indices> is something like [0,3...5] or [all]
(add -v to see detail)
Run 'perf list' for a list of valid events

Usage: perf record [<options>] [<command>]
or: perf record [<options>] -- <command> [<options>]

-e, --event <event> event selector. use 'perf list' to list available events

Signed-off-by: Wang Nan <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/util/parse-events.c | 5 ++-
tools/perf/util/parse-events.l | 13 ++++++-
tools/perf/util/parse-events.y | 85 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index af3d657..abdf551 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -660,9 +660,10 @@ parse_events_config_bpf(struct parse_events_evlist *data,
sizeof(errbuf));
data->error->help = strdup(
"Hint:\tValid config terms:\n"
-" \tmaps:[<arraymap>].value=[value]\n"
-" \tmaps:[<eventmap>].event=[event]\n"
+" \tmaps:[<arraymap>].value<indices>=[value]\n"
+" \tmaps:[<eventmap>].event<indices>=[event]\n"
"\n"
+" \twhere <indices> is something like [0,3...5] or [all]\n"
" \t(add -v to see detail)");
data->error->str = strdup(errbuf);
if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4387728..8bb3437 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -9,8 +9,8 @@
%{
#include <errno.h>
#include "../perf.h"
-#include "parse-events-bison.h"
#include "parse-events.h"
+#include "parse-events-bison.h"

char *parse_events_get_text(yyscan_t yyscanner);
YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
@@ -111,6 +111,7 @@ do { \
%x mem
%s config
%x event
+%x array

group [^,{}/]*[{][^}]*[}][^,{}/]*
event_pmu [^,{}/]+[/][^/]*[/][^,{}/]*
@@ -176,6 +177,14 @@ modifier_bp [rwx]{1,3}

}

+<array>{
+"]" { BEGIN(config); return ']'; }
+{num_dec} { return value(yyscanner, 10); }
+{num_hex} { return value(yyscanner, 16); }
+, { return ','; }
+"\.\.\." { return PE_ARRAY_RANGE; }
+}
+
<config>{
/*
* Please update parse_events_formats_error_string any time
@@ -196,6 +205,8 @@ no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
+\[all\] { return PE_ARRAY_ALL; }
+"[" { BEGIN(array); return '['; }
}

<mem>{
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index c3cbd7a..7e93b9f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -48,6 +48,7 @@ static inc_group_count(struct list_head *list,
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_ARRAY_ALL PE_ARRAY_RANGE
%type <num> PE_VALUE
%type <num> PE_VALUE_SYM_HW
%type <num> PE_VALUE_SYM_SW
@@ -84,6 +85,9 @@ static inc_group_count(struct list_head *list,
%type <head> group_def
%type <head> group
%type <head> groups
+%type <array> array
+%type <array> array_term
+%type <array> array_terms

%union
{
@@ -95,6 +99,7 @@ static inc_group_count(struct list_head *list,
char *sys;
char *event;
} tracepoint_name;
+ struct parse_events_array array;
}
%%

@@ -601,6 +606,86 @@ PE_TERM
ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
$$ = term;
}
+|
+PE_NAME array '=' PE_NAME
+{
+ struct parse_events_term *term;
+ int i;
+
+ ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $4, &@1, &@4));
+
+ term->array = $2;
+ $$ = term;
+}
+|
+PE_NAME array '=' PE_VALUE
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $4, &@1, &@4));
+ term->array = $2;
+ $$ = term;
+}
+
+array:
+'[' array_terms ']'
+{
+ $$ = $2;
+}
+|
+PE_ARRAY_ALL
+{
+ $$.nr_ranges = 0;
+ $$.ranges = NULL;
+}
+
+array_terms:
+array_terms ',' array_term
+{
+ struct parse_events_array new_array;
+
+ new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
+ new_array.ranges = malloc(sizeof(new_array.ranges[0]) *
+ new_array.nr_ranges);
+ ABORT_ON(!new_array.ranges);
+ memcpy(&new_array.ranges[0], $1.ranges,
+ $1.nr_ranges * sizeof(new_array.ranges[0]));
+ memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+ $3.nr_ranges * sizeof(new_array.ranges[0]));
+ free($1.ranges);
+ free($3.ranges);
+ $$ = new_array;
+}
+|
+array_term
+
+array_term:
+PE_VALUE
+{
+ struct parse_events_array array;
+
+ array.nr_ranges = 1;
+ array.ranges = malloc(sizeof(array.ranges[0]));
+ ABORT_ON(!array.ranges);
+ array.ranges[0].start = $1;
+ array.ranges[0].length = 1;
+ $$ = array;
+}
+|
+PE_VALUE PE_ARRAY_RANGE PE_VALUE
+{
+ struct parse_events_array array;
+
+ ABORT_ON($3 < $1);
+ array.nr_ranges = 1;
+ array.ranges = malloc(sizeof(array.ranges[0]));
+ ABORT_ON(!array.ranges);
+ array.ranges[0].start = $1;
+ array.ranges[0].length = $3 - $1 + 1;
+ $$ = array;
+}

sep_dc: ':' |
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Arnaldo Carvalho de Melo
2015-12-11 12:11:57 UTC
Permalink
Post by Wang Nan
# perf record -e bpf_file.c/maps.mymap.value[0,3...5,7]=1234/ ...
Is the above example valid? Wouldn't this be "maps:mymap.value" ?
Post by Wang Nan
By utilizing the basic facilities in bpf-loader.c which allow setting
different slots in a BPF map separately, the newly introduced syntax
allows perf to control specific elements in a BPF map.
# cat ./test_bpf_map_3.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_ARRAY = 2,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
Can you explain the above a bit more? What are the magic 1 and 6 values?
Post by Wang Nan
struct bpf_map_def SEC("maps") channel = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(unsigned char),
.max_entries = 100,
};
SEC("func=hrtimer_nanosleep rqtp->tv_nsec")
int func(void *ctx, int err, long nsec)
{
char fmt[] = "%ld\n";
long usec = nsec * 0x10624dd3 >> 38; // nsec / 1000
int key = (int)usec;
unsigned char *pval = map_lookup_elem(&channel, &key);
So that "mymap.value" name doesn't needs to be specified here?
Post by Wang Nan
if (!pval)
return 0;
bpf_trace_printk(fmt, sizeof(fmt), (unsigned char)*pval);
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************* END ***************************/
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[0,1,2,3...5]=101/' usleep 2
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data ]
# cat /sys/kernel/debug/tracing/trace | grep usleep
usleep-405 [004] d... 2745423.547822: : 101
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[0...9,20...29]=102,maps:channel.value[10...19]=103/' usleep 3
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data ]
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[0...9,20...29]=102,maps:channel.value[10...19]=103/' usleep 15
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data ]
# cat /sys/kernel/debug/tracing/trace | grep usleep
usleep-405 [004] d... 2745423.547822: : 101
usleep-655 [006] d... 2745434.122814: : 102
usleep-904 [006] d... 2745439.916264: : 103
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[all]=104/' usleep 99
# cat /sys/kernel/debug/tracing/trace | grep usleep
usleep-405 [004] d... 2745423.547822: : 101
usleep-655 [006] d... 2745434.122814: : 102
usleep-904 [006] d... 2745439.916264: : 103
usleep-1537 [003] d... 2745538.053737: : 104
# ./perf record -e './test_bpf_map_3.c/maps:channel.value[10...1000]=104/' usleep 99
event syntax error: '..annel.value[10...1000]=104/'
\___ Index too large
maps:[<arraymap>].value<indices>=[value]
maps:[<eventmap>].event<indices>=[event]
where <indices> is something like [0,3...5] or [all]
(add -v to see detail)
Run 'perf list' for a list of valid events
Usage: perf record [<options>] [<command>]
or: perf record [<options>] -- <command> [<options>]
-e, --event <event> event selector. use 'perf list' to list available events
---
tools/perf/util/parse-events.c | 5 ++-
tools/perf/util/parse-events.l | 13 ++++++-
tools/perf/util/parse-events.y | 85 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 100 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index af3d657..abdf551 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -660,9 +660,10 @@ parse_events_config_bpf(struct parse_events_evlist *data,
sizeof(errbuf));
data->error->help = strdup(
"Hint:\tValid config terms:\n"
-" \tmaps:[<arraymap>].value=[value]\n"
-" \tmaps:[<eventmap>].event=[event]\n"
+" \tmaps:[<arraymap>].value<indices>=[value]\n"
+" \tmaps:[<eventmap>].event<indices>=[event]\n"
"\n"
+" \twhere <indices> is something like [0,3...5] or [all]\n"
" \t(add -v to see detail)");
data->error->str = strdup(errbuf);
if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4387728..8bb3437 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -9,8 +9,8 @@
%{
#include <errno.h>
#include "../perf.h"
-#include "parse-events-bison.h"
#include "parse-events.h"
+#include "parse-events-bison.h"
char *parse_events_get_text(yyscan_t yyscanner);
YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
@@ -111,6 +111,7 @@ do { \
%x mem
%s config
%x event
+%x array
group [^,{}/]*[{][^}]*[}][^,{}/]*
event_pmu [^,{}/]+[/][^/]*[/][^,{}/]*
@@ -176,6 +177,14 @@ modifier_bp [rwx]{1,3}
}
+<array>{
+"]" { BEGIN(config); return ']'; }
+{num_dec} { return value(yyscanner, 10); }
+{num_hex} { return value(yyscanner, 16); }
+, { return ','; }
+"\.\.\." { return PE_ARRAY_RANGE; }
+}
+
<config>{
/*
* Please update parse_events_formats_error_string any time
@@ -196,6 +205,8 @@ no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
+\[all\] { return PE_ARRAY_ALL; }
+"[" { BEGIN(array); return '['; }
}
<mem>{
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index c3cbd7a..7e93b9f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -48,6 +48,7 @@ static inc_group_count(struct list_head *list,
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_ARRAY_ALL PE_ARRAY_RANGE
%type <num> PE_VALUE
%type <num> PE_VALUE_SYM_HW
%type <num> PE_VALUE_SYM_SW
@@ -84,6 +85,9 @@ static inc_group_count(struct list_head *list,
%type <head> group_def
%type <head> group
%type <head> groups
+%type <array> array
+%type <array> array_term
+%type <array> array_terms
%union
{
@@ -95,6 +99,7 @@ static inc_group_count(struct list_head *list,
char *sys;
char *event;
} tracepoint_name;
+ struct parse_events_array array;
}
%%
@@ -601,6 +606,86 @@ PE_TERM
$$ = term;
}
+|
+PE_NAME array '=' PE_NAME
+{
+ struct parse_events_term *term;
+ int i;
+
+ ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+
+ term->array = $2;
+ $$ = term;
+}
+|
+PE_NAME array '=' PE_VALUE
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ term->array = $2;
+ $$ = term;
+}
+
+'[' array_terms ']'
+{
+ $$ = $2;
+}
+|
+PE_ARRAY_ALL
+{
+ $$.nr_ranges = 0;
+ $$.ranges = NULL;
+}
+
+array_terms ',' array_term
+{
+ struct parse_events_array new_array;
+
+ new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
+ new_array.ranges = malloc(sizeof(new_array.ranges[0]) *
+ new_array.nr_ranges);
+ ABORT_ON(!new_array.ranges);
+ memcpy(&new_array.ranges[0], $1.ranges,
+ $1.nr_ranges * sizeof(new_array.ranges[0]));
+ memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+ $3.nr_ranges * sizeof(new_array.ranges[0]));
+ free($1.ranges);
+ free($3.ranges);
+ $$ = new_array;
+}
+|
+array_term
+
+PE_VALUE
+{
+ struct parse_events_array array;
+
+ array.nr_ranges = 1;
+ array.ranges = malloc(sizeof(array.ranges[0]));
+ ABORT_ON(!array.ranges);
+ array.ranges[0].start = $1;
+ array.ranges[0].length = 1;
+ $$ = array;
+}
+|
+PE_VALUE PE_ARRAY_RANGE PE_VALUE
+{
+ struct parse_events_array array;
+
+ ABORT_ON($3 < $1);
+ array.nr_ranges = 1;
+ array.ranges = malloc(sizeof(array.ranges[0]));
+ ABORT_ON(!array.ranges);
+ array.ranges[0].start = $1;
+ array.ranges[0].length = $3 - $1 + 1;
+ $$ = array;
+}
sep_dc: ':' |
--
1.8.3.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Arnaldo Carvalho de Melo
2015-12-11 12:15:36 UTC
Permalink
Post by Arnaldo Carvalho de Melo
Post by Wang Nan
# perf record -e bpf_file.c/maps.mymap.value[0,3...5,7]=1234/ ...
Is the above example valid? Wouldn't this be "maps:mymap.value" ?
Post by Wang Nan
By utilizing the basic facilities in bpf-loader.c which allow setting
different slots in a BPF map separately, the newly introduced syntax
allows perf to control specific elements in a BPF map.
# cat ./test_bpf_map_3.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_ARRAY = 2,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
Can you explain the above a bit more? What are the magic 1 and 6 values?
So, from another patch:

static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int,
void *, unsigned long) =
(void *)23;

Where can I get this magical mistery table? Could this be hidden away in
some .h file automagically included in bpf scriptlets so that n00bies
like me don't have to be wtf'ing?

- Arnaldo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
pi3orama
2015-12-11 12:40:41 UTC
Permalink
发自我的 iPhone
Post by Wang Nan
Post by Arnaldo Carvalho de Melo
Post by Wang Nan
# perf record -e bpf_file.c/maps.mymap.value[0,3...5,7]=1234/ ...
Is the above example valid? Wouldn't this be "maps:mymap.value" ?
Post by Wang Nan
By utilizing the basic facilities in bpf-loader.c which allow setting
different slots in a BPF map separately, the newly introduced syntax
allows perf to control specific elements in a BPF map.
# cat ./test_bpf_map_3.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_ARRAY = 2,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
Can you explain the above a bit more? What are the magic 1 and 6 values?
static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int,
void *, unsigned long) =
(void *)23;
Where can I get this magical mistery table? Could this be hidden away in
some .h file automagically included in bpf scriptlets so that n00bies
like me don't have to be wtf'ing?
They are function numbers defined in bpf.h and bpf-common.h, but they are Linux
headers. Directly include them causes many error for llvm. Also, the function
prototypes are BPF specific and can't included in Linux source. We should have
a place holds those indices and prototypes together.
Post by Wang Nan
- Arnaldo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Arnaldo Carvalho de Melo
2015-12-11 12:47:14 UTC
Permalink
Post by pi3orama
发自我的 iPhone
Post by Wang Nan
Post by Arnaldo Carvalho de Melo
Post by Wang Nan
# perf record -e bpf_file.c/maps.mymap.value[0,3...5,7]=1234/ ...
Is the above example valid? Wouldn't this be "maps:mymap.value" ?
Post by Wang Nan
By utilizing the basic facilities in bpf-loader.c which allow setting
different slots in a BPF map separately, the newly introduced syntax
allows perf to control specific elements in a BPF map.
# cat ./test_bpf_map_3.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_ARRAY = 2,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
Can you explain the above a bit more? What are the magic 1 and 6 values?
static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int,
void *, unsigned long) =
(void *)23;
Where can I get this magical mistery table? Could this be hidden away in
some .h file automagically included in bpf scriptlets so that n00bies
like me don't have to be wtf'ing?
They are function numbers defined in bpf.h and bpf-common.h, but they are Linux
headers. Directly include them causes many error for llvm. Also, the function
prototypes are BPF specific and can't included in Linux source. We should have
a place holds those indices and prototypes together.
Sure, just please don't assume whoever is reading your patches has this
background, provide comments above such places, so that reviewing gets
facilitated.

I eventually figured this is some sort of trampoline to access kernel
functions:

/* integer value in 'imm' field of BPF_CALL instruction selects which
* helper function eBPF program intends to call
*/
enum bpf_func_id {
BPF_FUNC_unspec,
BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */


But if you had just:

/*
* See enum_bpf_func_id in ./include/uapi/linux/bpf.h
*/

That would've helped.

- Arnaldo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
pi3orama
2015-12-11 12:58:14 UTC
Permalink
发自我的 iPhone
Post by Arnaldo Carvalho de Melo
Post by pi3orama
发自我的 iPhone
Post by Wang Nan
Post by Arnaldo Carvalho de Melo
Post by Wang Nan
# perf record -e bpf_file.c/maps.mymap.value[0,3...5,7]=1234/ ...
Is the above example valid? Wouldn't this be "maps:mymap.value" ?
Post by Wang Nan
By utilizing the basic facilities in bpf-loader.c which allow setting
different slots in a BPF map separately, the newly introduced syntax
allows perf to control specific elements in a BPF map.
# cat ./test_bpf_map_3.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_ARRAY = 2,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
Can you explain the above a bit more? What are the magic 1 and 6 values?
static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int,
void *, unsigned long) =
(void *)23;
Where can I get this magical mistery table? Could this be hidden away in
some .h file automagically included in bpf scriptlets so that n00bies
like me don't have to be wtf'ing?
They are function numbers defined in bpf.h and bpf-common.h, but they are Linux
headers. Directly include them causes many error for llvm. Also, the function
prototypes are BPF specific and can't included in Linux source. We should have
a place holds those indices and prototypes together.
Sure, just please don't assume whoever is reading your patches has this
background, provide comments above such places, so that reviewing gets
facilitated.
I eventually figured this is some sort of trampoline to access kernel
/* integer value in 'imm' field of BPF_CALL instruction selects which
* helper function eBPF program intends to call
*/
enum bpf_func_id {
BPF_FUNC_unspec,
BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */
/*
* See enum_bpf_func_id in ./include/uapi/linux/bpf.h
*/
That would've helped.
Thank you, but I think this is a good chance to setup the policy about the header
files for BPF. I suggested to put BPF specific headers into Linux kernel include dir,
but we must find a way to avoid Linux includes them. Another useful structure is
pt_regs, however which is not as important as before because we have prologue
now.

I'd like to have a try next week.

Thank you.
Post by Arnaldo Carvalho de Melo
- Arnaldo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Alexei Starovoitov
2015-12-11 18:22:13 UTC
Permalink
Post by pi3orama
Post by Wang Nan
static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int,
void *, unsigned long) =
(void *)23;
Where can I get this magical mistery table? Could this be hidden away in
some .h file automagically included in bpf scriptlets so that n00bies
like me don't have to be wtf'ing?
They are function numbers defined in bpf.h and bpf-common.h, but they are Linux
headers. Directly include them causes many error for llvm. Also, the function
prototypes are BPF specific and can't included in Linux source. We should have
a place holds those indices and prototypes together.
wait, what kind of errors?
they are in uapi, so gets installed into /usr/include eventually
and I haven't seen any erros either with gcc or clang.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wangnan (F)
2015-12-14 03:28:24 UTC
Permalink
Post by Alexei Starovoitov
Post by pi3orama
Post by Wang Nan
static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int,
void *, unsigned long) =
(void *)23;
Where can I get this magical mistery table? Could this be hidden away in
some .h file automagically included in bpf scriptlets so that n00bies
like me don't have to be wtf'ing?
They are function numbers defined in bpf.h and bpf-common.h, but they are Linux
headers. Directly include them causes many error for llvm. Also, the function
prototypes are BPF specific and can't included in Linux source. We should have
a place holds those indices and prototypes together.
wait, what kind of errors?
they are in uapi, so gets installed into /usr/include eventually
and I haven't seen any erros either with gcc or clang.
Sorry. I saw error because I use

#include <linux/bpf.h>

It is okay if I use

#include <uapi/linux/bpf.h>

Thank you.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Alexei Starovoitov
2015-12-14 04:28:11 UTC
Permalink
Post by Wangnan (F)
Post by Alexei Starovoitov
Post by pi3orama
Post by Wang Nan
static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int,
void *, unsigned long) =
(void *)23;
Where can I get this magical mistery table? Could this be hidden away in
some .h file automagically included in bpf scriptlets so that n00bies
like me don't have to be wtf'ing?
They are function numbers defined in bpf.h and bpf-common.h, but they are Linux
headers. Directly include them causes many error for llvm. Also, the function
prototypes are BPF specific and can't included in Linux source. We should have
a place holds those indices and prototypes together.
wait, what kind of errors?
they are in uapi, so gets installed into /usr/include eventually
and I haven't seen any erros either with gcc or clang.
Sorry. I saw error because I use
#include <linux/bpf.h>
It is okay if I use
#include <uapi/linux/bpf.h>
then let's use that instead of copy-paste. thanks

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wangnan (F)
2015-12-14 04:40:34 UTC
Permalink
Post by Alexei Starovoitov
Post by Wangnan (F)
Post by Alexei Starovoitov
Post by pi3orama
Post by Wang Nan
static u64 (*bpf_ktime_get_ns)(void) =
(void *)5;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_output)(void *, struct bpf_map_def *, int,
void *, unsigned long) =
(void *)23;
Where can I get this magical mistery table? Could this be hidden away in
some .h file automagically included in bpf scriptlets so that n00bies
like me don't have to be wtf'ing?
They are function numbers defined in bpf.h and bpf-common.h, but they are Linux
headers. Directly include them causes many error for llvm. Also, the function
prototypes are BPF specific and can't included in Linux source. We should have
a place holds those indices and prototypes together.
wait, what kind of errors?
they are in uapi, so gets installed into /usr/include eventually
and I haven't seen any erros either with gcc or clang.
Sorry. I saw error because I use
#include <linux/bpf.h>
It is okay if I use
#include <uapi/linux/bpf.h>
then let's use that instead of copy-paste. thanks
And what do you think about the BPF function prototype? Should we put them
into kernel headers? What about::

diff --git a/include/uapi/linux/bpf_functions.h
b/include/uapi/linux/bpf_functions.h
new file mode 100644
index 0000000..3a562d4
--- /dev/null
+++ b/include/uapi/linux/bpf_functions.h
@@ -0,0 +1,2 @@
+DEFINE_BPF_FUNC(void *, map_lookup_elem, void *, void *)
+DEFINE_BPF_FUNC(int, map_update_elem, void *, void *, void *, int)
[SNIP]
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9ea2d22..2f2f05f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -133,143 +133,23 @@ union bpf_attr {
};
} __attribute__((aligned(8)));

+#define DEFINE_BPF_FUNC(rettype, name, arglist...) BPF_FUNC_##name
+
+enum bpf_func_id {
+BPF_FUNC_unspec,
+#include "bpf_functions.h"
+__BPF_FUNC_MAX_ID,
+};
+
+#ifdef __BPF_SOURCE__
+#undef DEFINE_BPF_FUNC
+#define DEFINE_BPF_FUNC(rettype, name, arglist...) static rettype
(*name)(arglist) = (void *)BPF_FUNC_##name
+#include "bpf_functions.h"
+#endif
/* integer value in 'imm' field of BPF_CALL instruction selects which
helper
* function eBPF program intends to call
*/
enum bpf_func_id {
- BPF_FUNC_unspec,
[SNIP]

And when compiling BPF source file we add a __BPF_SOURCE__ directive?

Thank you.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Alexei Starovoitov
2015-12-14 05:51:38 UTC
Permalink
Post by Wangnan (F)
And what do you think about the BPF function prototype? Should we put them
+#define DEFINE_BPF_FUNC(rettype, name, arglist...) static rettype
(*name)(arglist) = (void *)BPF_FUNC_##name
tldr: let's keep it as a part of user headers until better
solution found.

frankly
static void *(*bpf_map_lookup_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_lookup_elem;
was llvm hack that I thought will be fixed quickly.
That was the easiest way to make C/llvm/bpf_loader to agree on
passing 'bpf_call #num' insn into the kernel.
It works, but it works only with -O2 and higher.
At lower optimization levels llvm generates load of constant
into register and indirect call by register, so that's not suitable
as clean api. bcc with clang::rewriter can solve it, but we don't
want to always depend on that, so currently it's a status quo.
Don't mess with what ain't broken.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:29:54 UTC
Permalink
A new syntax is appended into parser so user can pass predefined perf
events into BPF objects.

After this patch, BPF programs for perf are finally able to utilize
bpf_perf_event_read() introduced in commit 35578d7984003097af2b1e3
(bpf: Implement function bpf_perf_event_read() that get the selected
hardware PMU conuter).

Test result:

# cat ./test_bpf_map_2.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_PERF_EVENT_ARRAY = 4,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_read)(struct bpf_map_def *, int) =
(void *)22;

struct bpf_map_def SEC("maps") pmu_map = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = __NR_CPUS__,
};
SEC("func_write=sys_write")
int func_write(void *ctx)
{
unsigned long long val;
char fmt[] = "sys_write: pmu=%llu\n";
val = bpf_perf_event_read(&pmu_map, bpf_get_smp_processor_id());
bpf_trace_printk(fmt, sizeof(fmt), val);
return 0;
}

SEC("func_write_return=sys_write%return")
int func_write_return(void *ctx)
{
unsigned long long val = 0;
char fmt[] = "sys_write_return: pmu=%llu\n";
val = bpf_perf_event_read(&pmu_map, bpf_get_smp_processor_id());
bpf_trace_printk(fmt, sizeof(fmt), val);
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************* END ***************************/

Normal case 1:
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e evt=cycles/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map.event=evt/' ls /
[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ]
# cat /sys/kernel/debug/tracing/trace | grep ls
ls-13865 [006] d... 2722740.933204: : sys_write: pmu=1121685
ls-13865 [006] dN.. 2722740.933242: : sys_write_return: pmu=1178149
ls-13865 [006] d... 2722740.933248: : sys_write: pmu=1194986
ls-13865 [006] dN.. 2722740.933270: : sys_write_return: pmu=1220862

Normal case 2:
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e evt=cycles/period=0x7fffffffffffffff,no-inherit/ \
-e './test_bpf_map_2.c/maps:pmu_map.event=evt/' ls /
[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.013 MB perf.data ]
# ./perf report --stdio
Error:
The perf.data file has no samples!

(This is expected because we set period of cycles to a very large
value to period of cycles event because we want to use this event
as a counter only, don't need sampling)

# cat /sys/kernel/debug/tracing/trace | grep ls
ls-14446 [006] d... 2722976.486458: : sys_write: pmu=1116233
ls-14446 [006] dN.. 2722976.486486: : sys_write_return: pmu=1162108
ls-14446 [006] d... 2722976.486491: : sys_write: pmu=1177122
ls-14446 [006] dN.. 2722976.486511: : sys_write_return: pmu=1202417

Normal case 3:
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -i -e cycles -e './test_bpf_map_2.c/maps:pmu_map.event=cycles/' ls /

(When doesn't explicitly set alias, event name can be used to search events)

[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ]
# cat /sys/kernel/debug/tracing/trace | grep ls
ls-16480 [005] d... 2724143.955040: : sys_write: pmu=1150794
ls-16480 [005] dN.. 2724143.955077: : sys_write_return: pmu=1207161
ls-16480 [005] d... 2724143.955083: : sys_write: pmu=1219145
ls-16480 [005] dN.. 2724143.955104: : sys_write_return: pmu=1245433

Normal case 4 (one thread case):
# ls /proc/11808/task/
11808
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e evt=cycles/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map.event=evt/' -p 11808
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.019 MB perf.data (2 samples) ]

# cat /sys/kernel/debug/tracing/trace | grep 11808
sshd-11808 [000] d... 2740454.781150: : sys_write: pmu=18446744073709551594
sshd-11808 [000] d... 2740454.781168: : sys_write_return: pmu=18446744073709551594
sshd-11808 [003] d... 2740467.411799: : sys_write: pmu=131031
sshd-11808 [003] dN.. 2740467.411806: : sys_write_return: pmu=161549
sshd-11808 [003] d... 2740467.411834: : sys_write: pmu=210269

Normal case 5 (system wide):
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e evt=cycles/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map.event=evt/' -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ]

# cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20
[SNIP]
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373
gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696
gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658
gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572

Error case 1:

# ./perf record -e './test_bpf_map_2.c' ls /
[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.014 MB perf.data ]
# cat /sys/kernel/debug/tracing/trace | grep ls
ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614
ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614
ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614
ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614

(18446744073709551614 is 0xfffffffffffffffe (-2))

Error case 2:
# ./perf record -e cycles -e './test_bpf_map_2.c/maps:pmu_map.event=evt/' -a
event syntax error: '..ps:pmu_map.event=evt/'
\___ Event not found for map setting

Hint: Valid config terms:
maps:[<arraymap>].value=[value]
maps:[<eventmap>].event=[event]
[SNIP]

Error case 3:
# ls /proc/2342/task/
2342 2373 2374 2375 2376
# ./perf record -e evt=cycles/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map.event=evt/' -p 2342
ERROR: Apply config to BPF failed: Cannot set event to BPF maps in multi-thread tracing

Error case 4:
# ./perf record -e cycles -e './test_bpf_map_2.c/maps:pmu_map.event=cycles/' ls /
ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use /no-inherit/ config term or use -i)

Error case 5:
# ./perf record -e evt=raw_syscalls:sys_enter/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map.event=evt/' ls
ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map

Signed-off-by: Wang Nan <***@huawei.com>
Signed-off-by: He Kuang <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/util/bpf-loader.c | 138 ++++++++++++++++++++++++++++++++++++++++-
tools/perf/util/bpf-loader.h | 5 ++
tools/perf/util/parse-events.c | 4 +-
3 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 96fd18b..97f5efc 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -742,6 +742,7 @@ int bpf__foreach_tev(struct bpf_object *obj,

enum bpf_map_op_type {
BPF_MAP_OP_SET_VALUE,
+ BPF_MAP_OP_SET_EVSEL,
};

enum bpf_map_key_type {
@@ -754,6 +755,7 @@ struct bpf_map_op {
enum bpf_map_key_type key_type;
union {
u64 value;
+ struct perf_evsel *evsel;
} v;
};

@@ -891,10 +893,73 @@ bpf__obj_config_map_value(struct bpf_map *map,
if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
return bpf__obj_config_map_array_value(map, term);

- pr_debug("ERROR: wrong value type\n");
+ pr_debug("ERROR: wrong value type for 'value'\n");
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
}

+static int
+bpf__obj_config_map_array_event(struct bpf_map *map,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ struct bpf_map_def def;
+ struct bpf_map_op *op;
+ const char *map_name;
+ int err;
+
+ map_name = bpf_map__get_name(map);
+ evsel = perf_evlist__find_evsel_by_alias(evlist, term->val.str);
+ if (!evsel) {
+ pr_debug("Event (for '%s') '%s' doesn't exist\n",
+ map_name, term->val.str);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
+ }
+
+ err = bpf_map__get_def(map, &def);
+ if (err) {
+ pr_debug("Unable to get map definition from '%s'\n",
+ map_name);
+ return err;
+ }
+
+ /*
+ * No need to check key_size and value_size:
+ * kernel has already checked them.
+ */
+ if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+ map_name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+ }
+
+ op = bpf_map_op__alloc(map);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ op->v.evsel = evsel;
+ op->op_type = BPF_MAP_OP_SET_EVSEL;
+ return 0;
+}
+
+static int
+bpf__obj_config_map_event(struct bpf_map *map,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist)
+{
+ if (!term->err_val) {
+ pr_debug("Config value not set\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+ }
+
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+ return bpf__obj_config_map_array_event(map, term, evlist);
+
+ pr_debug("ERROR: wrong value type for 'event'\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+}
+
+
struct bpf_obj_config_map_func {
const char *config_opt;
int (*config_func)(struct bpf_map *, struct parse_events_term *,
@@ -903,6 +968,7 @@ struct bpf_obj_config_map_func {

struct bpf_obj_config_map_func bpf_obj_config_map_funcs[] = {
{"value", bpf__obj_config_map_value},
+ {"event", bpf__obj_config_map_event},
};

static int
@@ -1047,6 +1113,7 @@ bpf_map_config_foreach_key(struct bpf_map *map,
list_for_each_entry(op, &priv->ops_list, list) {
switch (def.type) {
case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
switch (op->key_type) {
case BPF_MAP_KEY_ALL:
return foreach_key_array_all(func, arg, name,
@@ -1101,6 +1168,60 @@ apply_config_value_for_key(int map_fd, void *pkey,
}

static int
+apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
+ struct perf_evsel *evsel)
+{
+ struct xyarray *xy = evsel->fd;
+ struct perf_event_attr *attr;
+ unsigned int key, events;
+ bool check_pass = false;
+ int *evt_fd;
+ int err;
+
+ if (!xy) {
+ pr_debug("ERROR: evsel not ready for map %s\n", name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (xy->row_size / xy->entry_size != 1) {
+ pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
+ name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
+ }
+
+ attr = &evsel->attr;
+ if (attr->inherit) {
+ pr_debug("ERROR: Can't put inherit event into map %s\n", name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
+ }
+
+ if (attr->type == PERF_TYPE_RAW)
+ check_pass = true;
+ if (attr->type == PERF_TYPE_HARDWARE)
+ check_pass = true;
+ if (attr->type == PERF_TYPE_SOFTWARE &&
+ attr->config == PERF_COUNT_SW_BPF_OUTPUT)
+ check_pass = true;
+ if (!check_pass) {
+ pr_debug("ERROR: Event type is wrong for map %s\n", name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
+ }
+
+ events = xy->entries / (xy->row_size / xy->entry_size);
+ key = *((unsigned int *)pkey);
+ if (key >= events) {
+ pr_debug("ERROR: there is no event %d for map %s\n",
+ key, name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
+ }
+ evt_fd = xyarray__entry(xy, key, 0);
+ err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
+ if (err && errno)
+ err = -errno;
+ return err;
+}
+
+static int
apply_obj_config_map_for_key(const char *name, int map_fd,
struct bpf_map_def *pdef __maybe_unused,
struct bpf_map_op *op,
@@ -1114,6 +1235,10 @@ apply_obj_config_map_for_key(const char *name, int map_fd,
pdef->value_size,
op->v.value);
break;
+ case BPF_MAP_OP_SET_EVSEL:
+ err = apply_config_evsel_for_key(name, map_fd, pkey,
+ op->v.evsel);
+ break;
default:
pr_debug("ERROR: unknown value type for '%s'\n", name);
err = -BPF_LOADER_ERRNO__INTERNAL;
@@ -1179,6 +1304,11 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type",
[ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size",
[ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size",
+ [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting",
+ [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting",
+ [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large",
+ [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event",
+ [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map",
};

static int
@@ -1315,6 +1445,12 @@ int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
{
bpf__strerror_head(err, buf, size);
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
+ "Cannot set event to BPF maps in multi-thread tracing");
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
+ "%s (Hint: use /no-inherit/ config term or use -i)", emsg);
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
+ "Can only put raw, hardware and BPF output event into a BPF map");
bpf__strerror_end(buf, size);
return 0;
}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index db3c34c..c9ce792 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -33,6 +33,11 @@ enum bpf_loader_errno {
BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */
BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */
BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT, /* Event not found for map setting */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE, /* Invalid map size for event setting */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */
__BPF_LOADER_ERRNO__END,
};

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 484c8e4..5d682dd 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -659,8 +659,10 @@ parse_events_config_bpf(struct parse_events_evlist *data,
&error_pos, err, errbuf,
sizeof(errbuf));
data->error->help = strdup(
-"Hint:\tValid config term:\n"
+"Hint:\tValid config terms:\n"
" \tmaps:[<arraymap>].value=[value]\n"
+" \tmaps:[<eventmap>].event=[event]\n"
+"\n"
" \t(add -v to see detail)");
data->error->str = strdup(errbuf);
if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:30:17 UTC
Permalink
From: He Kuang <***@huawei.com>

This patch adds new bison rules for specifying an alias name to a perf
event, which allows cmdline refer to previous defined perf event through
its name. With this patch user can give alias name to a perf event using
following cmdline:

# perf record -e mypmu=cycles ...

If alias is not provided (normal case):

# perf record -e cycles ...

It will be set to event's name automatically ('cycles' in the above
example).

To allow parser refer to existing event selector, pass event list to
'struct parse_events_evlist'. perf_evlist__find_evsel_by_alias() is
introduced to get evsel through its alias.

Test result:

Before this patch:

# ./perf record -e evt=cycles usleep 10
event syntax error: 'evt=cycles'
\___ parser error
Run 'perf list' for a list of valid events
[SNIP]

After this patch:

# ./perf record -e evt=cycles usleep 10
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data (2 samples) ]

Signed-off-by: He Kuang <***@huawei.com>
Signed-off-by: Wang Nan <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/util/evlist.c | 16 ++++++++++++++++
tools/perf/util/evlist.h | 4 ++++
tools/perf/util/evsel.c | 1 +
tools/perf/util/evsel.h | 1 +
tools/perf/util/parse-events.c | 37 ++++++++++++++++++++++++++++++++-----
tools/perf/util/parse-events.h | 5 +++++
tools/perf/util/parse-events.y | 15 ++++++++++++++-
7 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d1b6c20..a822823 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1737,3 +1737,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,

tracking_evsel->tracking = true;
}
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_alias(struct perf_evlist *evlist,
+ const char *alias)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ if (!evsel->alias)
+ continue;
+ if (strcmp(alias, evsel->alias) == 0)
+ return evsel;
+ }
+
+ return NULL;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a459fe7..4e25342 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -292,4 +292,8 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
struct perf_evsel *tracking_evsel);

void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_alias(struct perf_evlist *evlist, const char *alias);
+
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 47f0330..8e0e6f4 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1073,6 +1073,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
thread_map__put(evsel->threads);
zfree(&evsel->group_name);
zfree(&evsel->name);
+ zfree(&evsel->alias);
perf_evsel__object.fini(evsel);
}

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 5ded1fc..5f6dd57 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -89,6 +89,7 @@ struct perf_evsel {
int idx;
u32 ids;
char *name;
+ char *alias;
double scale;
const char *unit;
struct event_format *tp_format;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 95775fe..484c8e4 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -653,9 +653,9 @@ parse_events_config_bpf(struct parse_events_evlist *data,
return -EINVAL;
}

- err = bpf__config_obj(obj, term, NULL, &error_pos);
+ err = bpf__config_obj(obj, term, data->evlist, &error_pos);
if (err) {
- bpf__strerror_config_obj(obj, term, NULL,
+ bpf__strerror_config_obj(obj, term, data->evlist,
&error_pos, err, errbuf,
sizeof(errbuf));
data->error->help = strdup(
@@ -1089,6 +1089,30 @@ int parse_events__modifier_group(struct list_head *list,
return parse_events__modifier_event(list, event_mod, true);
}

+int parse_events__set_event_alias(struct parse_events_evlist *data,
+ struct list_head *list,
+ const char *str,
+ void *loc_alias_)
+{
+ struct perf_evsel *evsel;
+ YYLTYPE *loc_alias = loc_alias_;
+
+ if (!str)
+ return 0;
+
+ if (!list_is_singular(list)) {
+ struct parse_events_error *err = data->error;
+
+ err->idx = loc_alias->first_column;
+ err->str = strdup("One alias can be applied to one event only");
+ return -EINVAL;
+ }
+
+ evsel = list_first_entry(list, struct perf_evsel, node);
+ evsel->alias = strdup(str);
+ return evsel->alias ? 0 : -ENOMEM;
+}
+
void parse_events__set_leader(char *name, struct list_head *list)
{
struct perf_evsel *leader;
@@ -1281,6 +1305,8 @@ int parse_events_name(struct list_head *list, char *name)
__evlist__for_each(list, evsel) {
if (!evsel->name)
evsel->name = strdup(name);
+ if (!evsel->alias)
+ evsel->alias = strdup(name);
}

return 0;
@@ -1442,9 +1468,10 @@ int parse_events(struct perf_evlist *evlist, const char *str,
struct parse_events_error *err)
{
struct parse_events_evlist data = {
- .list = LIST_HEAD_INIT(data.list),
- .idx = evlist->nr_entries,
- .error = err,
+ .list = LIST_HEAD_INIT(data.list),
+ .idx = evlist->nr_entries,
+ .error = err,
+ .evlist = evlist,
};
int ret;

diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 84694f3..20ad3c2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -98,6 +98,7 @@ struct parse_events_evlist {
int idx;
int nr_groups;
struct parse_events_error *error;
+ struct perf_evlist *evlist;
};

struct parse_events_terms {
@@ -171,4 +172,8 @@ extern int is_valid_tracepoint(const char *event_string);
int valid_event_mount(const char *eventfs);
char *parse_events_formats_error_string(char *additional_terms);

+int parse_events__set_event_alias(struct parse_events_evlist *data,
+ struct list_head *list,
+ const char *str,
+ void *loc_alias_);
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 8992d16..c3cbd7a 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -77,6 +77,7 @@ static inc_group_count(struct list_head *list,
%type <head> event_bpf_file
%type <head> event_def
%type <head> event_mod
+%type <head> event_alias
%type <head> event_name
%type <head> event
%type <head> events
@@ -193,13 +194,25 @@ event_name PE_MODIFIER_EVENT
event_name

event_name:
-PE_EVENT_NAME event_def
+PE_EVENT_NAME event_alias
{
ABORT_ON(parse_events_name($2, $1));
free($1);
$$ = $2;
}
|
+event_alias
+
+event_alias:
+PE_NAME '=' event_def
+{
+ struct list_head *list = $3;
+ struct parse_events_evlist *data = _data;
+
+ ABORT_ON(parse_events__set_event_alias(data, list, $1, &@1));
+ $$ = list;
+}
+|
event_def

event_def: event_pmu |
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Arnaldo Carvalho de Melo
2015-12-11 12:03:46 UTC
Permalink
Post by Wang Nan
This patch adds new bison rules for specifying an alias name to a perf
event, which allows cmdline refer to previous defined perf event through
its name. With this patch user can give alias name to a perf event using
Please add Jiri Olsa to any changes that touches the parser changes.

Can you reword the above phrase? Does it mean something like:

----
This patches adds new bison rules for specifying an alias to a perf
event, which allows referring to this previously defined perf event
through this alias."
----

But then, why would I want this aliasing? The provided examples shows a
way to obfuscate 'cycles', a perfectly good name, why would one want to
call it "mypmu"?
Post by Wang Nan
# perf record -e mypmu=cycles ...
# perf record -e cycles ...
It will be set to event's name automatically ('cycles' in the above
example).
Sure, but when would I use 'mypmu'?
Post by Wang Nan
To allow parser refer to existing event selector, pass event list to
'struct parse_events_evlist'. perf_evlist__find_evsel_by_alias() is
introduced to get evsel through its alias.
# ./perf record -e evt=cycles usleep 10
event syntax error: 'evt=cycles'
\___ parser error
Run 'perf list' for a list of valid events
[SNIP]
Sure, before this patch aliases are not supported, so it will fail.
Post by Wang Nan
# ./perf record -e evt=cycles usleep 10
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data (2 samples) ]
So? What are the effects on the output of 'perf evlist'? I guess it will
appear just as 'cycles'?

Can you provide at least an example where we _use_ this alias and by
doing that we gain something?

- Arnaldo
Post by Wang Nan
---
tools/perf/util/evlist.c | 16 ++++++++++++++++
tools/perf/util/evlist.h | 4 ++++
tools/perf/util/evsel.c | 1 +
tools/perf/util/evsel.h | 1 +
tools/perf/util/parse-events.c | 37 ++++++++++++++++++++++++++++++++-----
tools/perf/util/parse-events.h | 5 +++++
tools/perf/util/parse-events.y | 15 ++++++++++++++-
7 files changed, 73 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d1b6c20..a822823 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1737,3 +1737,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
tracking_evsel->tracking = true;
}
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_alias(struct perf_evlist *evlist,
+ const char *alias)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ if (!evsel->alias)
+ continue;
+ if (strcmp(alias, evsel->alias) == 0)
+ return evsel;
+ }
+
+ return NULL;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a459fe7..4e25342 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -292,4 +292,8 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
struct perf_evsel *tracking_evsel);
void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_alias(struct perf_evlist *evlist, const char *alias);
+
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 47f0330..8e0e6f4 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1073,6 +1073,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
thread_map__put(evsel->threads);
zfree(&evsel->group_name);
zfree(&evsel->name);
+ zfree(&evsel->alias);
perf_evsel__object.fini(evsel);
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 5ded1fc..5f6dd57 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -89,6 +89,7 @@ struct perf_evsel {
int idx;
u32 ids;
char *name;
+ char *alias;
double scale;
const char *unit;
struct event_format *tp_format;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 95775fe..484c8e4 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -653,9 +653,9 @@ parse_events_config_bpf(struct parse_events_evlist *data,
return -EINVAL;
}
- err = bpf__config_obj(obj, term, NULL, &error_pos);
+ err = bpf__config_obj(obj, term, data->evlist, &error_pos);
if (err) {
- bpf__strerror_config_obj(obj, term, NULL,
+ bpf__strerror_config_obj(obj, term, data->evlist,
&error_pos, err, errbuf,
sizeof(errbuf));
data->error->help = strdup(
@@ -1089,6 +1089,30 @@ int parse_events__modifier_group(struct list_head *list,
return parse_events__modifier_event(list, event_mod, true);
}
+int parse_events__set_event_alias(struct parse_events_evlist *data,
+ struct list_head *list,
+ const char *str,
+ void *loc_alias_)
+{
+ struct perf_evsel *evsel;
+ YYLTYPE *loc_alias = loc_alias_;
+
+ if (!str)
+ return 0;
+
+ if (!list_is_singular(list)) {
+ struct parse_events_error *err = data->error;
+
+ err->idx = loc_alias->first_column;
+ err->str = strdup("One alias can be applied to one event only");
+ return -EINVAL;
+ }
+
+ evsel = list_first_entry(list, struct perf_evsel, node);
+ evsel->alias = strdup(str);
+ return evsel->alias ? 0 : -ENOMEM;
+}
+
void parse_events__set_leader(char *name, struct list_head *list)
{
struct perf_evsel *leader;
@@ -1281,6 +1305,8 @@ int parse_events_name(struct list_head *list, char *name)
__evlist__for_each(list, evsel) {
if (!evsel->name)
evsel->name = strdup(name);
+ if (!evsel->alias)
+ evsel->alias = strdup(name);
}
return 0;
@@ -1442,9 +1468,10 @@ int parse_events(struct perf_evlist *evlist, const char *str,
struct parse_events_error *err)
{
struct parse_events_evlist data = {
- .list = LIST_HEAD_INIT(data.list),
- .idx = evlist->nr_entries,
- .error = err,
+ .list = LIST_HEAD_INIT(data.list),
+ .idx = evlist->nr_entries,
+ .error = err,
+ .evlist = evlist,
};
int ret;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 84694f3..20ad3c2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -98,6 +98,7 @@ struct parse_events_evlist {
int idx;
int nr_groups;
struct parse_events_error *error;
+ struct perf_evlist *evlist;
};
struct parse_events_terms {
@@ -171,4 +172,8 @@ extern int is_valid_tracepoint(const char *event_string);
int valid_event_mount(const char *eventfs);
char *parse_events_formats_error_string(char *additional_terms);
+int parse_events__set_event_alias(struct parse_events_evlist *data,
+ struct list_head *list,
+ const char *str,
+ void *loc_alias_);
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 8992d16..c3cbd7a 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -77,6 +77,7 @@ static inc_group_count(struct list_head *list,
%type <head> event_bpf_file
%type <head> event_def
%type <head> event_mod
+%type <head> event_alias
%type <head> event_name
%type <head> event
%type <head> events
@@ -193,13 +194,25 @@ event_name PE_MODIFIER_EVENT
event_name
-PE_EVENT_NAME event_def
+PE_EVENT_NAME event_alias
{
ABORT_ON(parse_events_name($2, $1));
free($1);
$$ = $2;
}
|
+event_alias
+
+PE_NAME '=' event_def
+{
+ struct list_head *list = $3;
+ struct parse_events_evlist *data = _data;
+
+ $$ = list;
+}
+|
event_def
event_def: event_pmu |
--
1.8.3.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
pi3orama
2015-12-11 12:13:17 UTC
Permalink
发自我的 iPhone
Post by Arnaldo Carvalho de Melo
Post by Wang Nan
This patch adds new bison rules for specifying an alias name to a perf
event, which allows cmdline refer to previous defined perf event through
its name. With this patch user can give alias name to a perf event using
Please add Jiri Olsa to any changes that touches the parser changes.
----
This patches adds new bison rules for specifying an alias to a perf
event, which allows referring to this previously defined perf event
through this alias."
----
But then, why would I want this aliasing? The provided examples shows a
way to obfuscate 'cycles', a perfectly good name, why would one want to
call it "mypmu"?
Sorry. I should put this patch after 7/16 so you won't get confused. Without event
aliasing it is hard to select an event and insert it into a BPF map.

Patch 7/16 would use alias like:

# perf record -e evt=cycles/no-inherit/ -e BPF.c/maps.pmu.event=evt/ ...
So it put cycles/no-inherit/ into that map.

Thank you.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:30:39 UTC
Permalink
This patch adds the final step for BPF map configuration. A new syntax
is appended into parser so user can config BPF objects through '/' '/'
enclosed config terms.

After this patch, following syntax is available:

# perf record -e bpf_file.c/maps:mymap:value=123/ ...

It would takes effect after appling following commits.

Test result:

# cat ./test_bpf_map_1.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_ARRAY = 2,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
struct bpf_map_def SEC("maps") channel = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1,
};
SEC("func=sys_nanosleep")
int func(void *ctx)
{
int key = 0;
char fmt[] = "%d\n";
int *pval = map_lookup_elem(&channel, &key);
if (!pval)
return 0;
bpf_trace_printk(fmt, sizeof(fmt), *pval);
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************* END ***************************/

- Normal case:
# ./perf record -e './test_bpf_map_1.c/maps:channel.value=10/' usleep 10
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data ]

- Error case:

# ./perf record -e './test_bpf_map_1.c/maps:channel.value/' usleep 10
event syntax error: '..ps:channel:value/'
\___ Config value not set (lost '=')
Hint: Valid config term:
maps:[<arraymap>]:value=[value]
(add -v to see detail)
Run 'perf list' for a list of valid events

Usage: perf record [<options>] [<command>]
or: perf record [<options>] -- <command> [<options>]

-e, --event <event> event selector. use 'perf list' to list available events

# ./perf record -e './test_bpf_map_1.c/xmaps:channel.value=10/' usleep 10
event syntax error: '..pf_map_1.c/xmaps:channel.value=10/'
\___ Invalid object config option
[SNIP]

# ./perf record -e './test_bpf_map_1.c/maps:xchannel.value=10/' usleep 10
event syntax error: '..p_1.c/maps:xchannel.value=10/'
\___ Target map not exist
[SNIP]

# ./perf record -e './test_bpf_map_1.c/maps:channel.xvalue=10/' usleep 10
event syntax error: '..ps:channel.xvalue=10/'
\___ Invalid object maps config option
[SNIP]

# ./perf record -e './test_bpf_map_1.c/maps:channel.value=x10/' usleep 10
event syntax error: '..nnel.value=x10/'
\___ Incorrect value type for map
[SNIP]

Change BPF_MAP_TYPE_ARRAY = 2 tp BPF_MAP_TYPE_ARRAY = 1:

# ./perf record -e './test_bpf_map_1.c/maps:channel.value=10/' usleep 10
event syntax error: '..ps:channel.value=10/'
\___ Can't use this config term to this type of map

Hint: Valid config term:
maps:[<arraymap>].value=[value]
(add -v to see detail)

Signed-off-by: Wang Nan <***@huawei.com>
Signed-off-by: He Kuang <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/util/parse-events.c | 56 +++++++++++++++++++++++++++++++++++++++---
tools/perf/util/parse-events.h | 3 ++-
tools/perf/util/parse-events.l | 2 +-
tools/perf/util/parse-events.y | 23 ++++++++++++++---
4 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6fc8cd7..95775fe 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -628,17 +628,64 @@ errout:
return err;
}

+static int
+parse_events_config_bpf(struct parse_events_evlist *data,
+ struct bpf_object *obj,
+ struct list_head *head_config)
+{
+ struct parse_events_term *term;
+ int error_pos;
+
+ if (!head_config || list_empty(head_config))
+ return 0;
+
+ list_for_each_entry(term, head_config, list) {
+ char errbuf[BUFSIZ];
+ int err;
+
+ if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
+ snprintf(errbuf, sizeof(errbuf),
+ "Invalid config term for BPF object");
+ errbuf[BUFSIZ - 1] = '\0';
+
+ data->error->idx = term->err_term;
+ data->error->str = strdup(errbuf);
+ return -EINVAL;
+ }
+
+ err = bpf__config_obj(obj, term, NULL, &error_pos);
+ if (err) {
+ bpf__strerror_config_obj(obj, term, NULL,
+ &error_pos, err, errbuf,
+ sizeof(errbuf));
+ data->error->help = strdup(
+"Hint:\tValid config term:\n"
+" \tmaps:[<arraymap>].value=[value]\n"
+" \t(add -v to see detail)");
+ data->error->str = strdup(errbuf);
+ if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+ data->error->idx = term->err_val;
+ else
+ data->error->idx = term->err_term + error_pos;
+ return err;
+ }
+ }
+ return 0;
+
+}
+
int parse_events_load_bpf(struct parse_events_evlist *data,
struct list_head *list,
char *bpf_file_name,
- bool source)
+ bool source,
+ struct list_head *head_config)
{
struct bpf_object *obj;
+ int err;

obj = bpf__prepare_load(bpf_file_name, source);
if (IS_ERR(obj)) {
char errbuf[BUFSIZ];
- int err;

err = PTR_ERR(obj);

@@ -656,7 +703,10 @@ int parse_events_load_bpf(struct parse_events_evlist *data,
return err;
}

- return parse_events_load_bpf_obj(data, list, obj);
+ err = parse_events_load_bpf_obj(data, list, obj);
+ if (err)
+ return err;
+ return parse_events_config_bpf(data, obj, head_config);
}

static int
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index f1a6db1..84694f3 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -126,7 +126,8 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
int parse_events_load_bpf(struct parse_events_evlist *data,
struct list_head *list,
char *bpf_file_name,
- bool source);
+ bool source,
+ struct list_head *head_config);
/* Provide this function for perf test */
struct bpf_object;
int parse_events_load_bpf_obj(struct parse_events_evlist *data,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 58c5831..4387728 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -122,7 +122,7 @@ num_dec [0-9]+
num_hex 0x[a-fA-F0-9]+
num_raw_hex [a-fA-F0-9]+
name [a-zA-Z_*?][a-zA-Z0-9_*?.]*
-name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]*
+name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
/* If you add a modifier you need to update check_modifier() */
modifier_event [ukhpPGHSDI]+
modifier_bp [rwx]{1,3}
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index ad37996..8992d16 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -64,6 +64,7 @@ static inc_group_count(struct list_head *list,
%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
%type <num> value_sym
%type <head> event_config
+%type <head> event_bpf_config
%type <term> event_term
%type <head> event_pmu
%type <head> event_legacy_symbol
@@ -455,27 +456,41 @@ PE_RAW
}

event_bpf_file:
-PE_BPF_OBJECT
+PE_BPF_OBJECT event_bpf_config
{
struct parse_events_evlist *data = _data;
struct parse_events_error *error = data->error;
struct list_head *list;

ALLOC_LIST(list);
- ABORT_ON(parse_events_load_bpf(data, list, $1, false));
+ ABORT_ON(parse_events_load_bpf(data, list, $1, false, $2));
+ if ($2)
+ parse_events__free_terms($2);
$$ = list;
}
|
-PE_BPF_SOURCE
+PE_BPF_SOURCE event_bpf_config
{
struct parse_events_evlist *data = _data;
struct list_head *list;

ALLOC_LIST(list);
- ABORT_ON(parse_events_load_bpf(data, list, $1, true));
+ ABORT_ON(parse_events_load_bpf(data, list, $1, true, $2));
+ if ($2)
+ parse_events__free_terms($2);
$$ = list;
}

+event_bpf_config:
+'/' event_config '/'
+{
+ $$ = $2;
+}
+|
+{
+ $$ = NULL;
+}
+
start_terms: event_config
{
struct parse_events_terms *data = _data;
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:30:54 UTC
Permalink
Useful for getting stack traces for hardware breakpoint events.

Test result:

Before this patch:
# ~/perf record -g -e mem:0x600980 ./sample
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.011 MB perf.data (12 samples) ]

# ~/perf script

# ~/perf script -F comm,tid,pid,time,event,ip,sym,dso
sample 22520/22520 97457.836294: mem:0x600980:
5a4ad8 __clear_user (/lib/modules/4.3.0-rc4+/build/vmlinux)
...
3f41ba sys_execve (/lib/modules/4.3.0-rc4+/build/vmlinux)
979395 return_from_execve (/lib/modules/4.3.0-rc4+/build/vmlinux)
7f1b59719cf7 [unknown] ([unknown])

sample 22520/22520 97457.836648: mem:0x600980:
532 main (/home/w00229757/DataBreakpoints/sample)
21bd5 __libc_start_main (/tmp/oxygen_root-root/lib64/libc-2.18.so)
...

After this patch:
# ~/perf script
sample 22520 97457.836294: mem:0x600980:
5a4ad8 __clear_user (/lib/modules/4.3.0-rc4+/build/vmlinux)
...
3f41ba sys_execve (/lib/modules/4.3.0-rc4+/build/vmlinux)
979395 return_from_execve (/lib/modules/4.3.0-rc4+/build/vmlinux)
7f1b59719cf7 [unknown] ([unknown])

sample 22520 97457.836648: mem:0x600980:
532 main (/home/w00229757/DataBreakpoints/sample)
21bd5 __libc_start_main (/tmp/oxygen_root-root/lib64/libc-2.18.so)

Signed-off-by: Wang Nan <***@huawei.com>
Cc: Adrian Hunter <***@intel.com>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Jiri Olsa <***@redhat.com>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/builtin-script.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3c3f8d0..d259e9a 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -130,6 +130,18 @@ static struct {

.invalid_fields = PERF_OUTPUT_TRACE,
},
+
+ [PERF_TYPE_BREAKPOINT] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+ PERF_OUTPUT_PERIOD,
+
+ .invalid_fields = PERF_OUTPUT_TRACE,
+ },
};

static bool output_set_by_user(void)
@@ -1129,6 +1141,8 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
type = PERF_TYPE_TRACEPOINT;
else if (!strcmp(str, "raw"))
type = PERF_TYPE_RAW;
+ else if (!strcmp(str, "break"))
+ type = PERF_TYPE_BREAKPOINT;
else {
fprintf(stderr, "Invalid event type in field string.\n");
rc = -EINVAL;
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
tip-bot for Wang Nan
2015-12-14 08:39:23 UTC
Permalink
Commit-ID: 27cfef009ae8a1019d174153987ce22a0e6677fc
Gitweb: http://git.kernel.org/tip/27cfef009ae8a1019d174153987ce22a0e6677fc
Author: Wang Nan <***@huawei.com>
AuthorDate: Tue, 8 Dec 2015 02:25:43 +0000
Committer: Arnaldo Carvalho de Melo <***@redhat.com>
CommitDate: Fri, 11 Dec 2015 09:25:16 -0300

perf script: Add support for PERF_TYPE_BREAKPOINT

Useful for getting stack traces for hardware breakpoint events.

Test result:

Before this patch:
# ~/perf record -g -e mem:0x600980 ./sample
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.011 MB perf.data (12 samples) ]

# ~/perf script

# ~/perf script -F comm,tid,pid,time,event,ip,sym,dso
sample 22520/22520 97457.836294: mem:0x600980:
5a4ad8 __clear_user (/lib/modules/4.3.0-rc4+/build/vmlinux)
...
3f41ba sys_execve (/lib/modules/4.3.0-rc4+/build/vmlinux)
979395 return_from_execve (/lib/modules/4.3.0-rc4+/build/vmlinux)
7f1b59719cf7 [unknown] ([unknown])

sample 22520/22520 97457.836648: mem:0x600980:
532 main (/home/w00229757/DataBreakpoints/sample)
21bd5 __libc_start_main (/tmp/oxygen_root-root/lib64/libc-2.18.so)
...

After this patch:
# ~/perf script
sample 22520 97457.836294: mem:0x600980:
5a4ad8 __clear_user (/lib/modules/4.3.0-rc4+/build/vmlinux)
...
3f41ba sys_execve (/lib/modules/4.3.0-rc4+/build/vmlinux)
979395 return_from_execve (/lib/modules/4.3.0-rc4+/build/vmlinux)
7f1b59719cf7 [unknown] ([unknown])

sample 22520 97457.836648: mem:0x600980:
532 main (/home/w00229757/DataBreakpoints/sample)
21bd5 __libc_start_main (/tmp/oxygen_root-root/lib64/libc-2.18.so)

Committer note:

So, further testing, lets do it for a kernel global variable,
tcp_hashinfo:

# grep -w tcp_hashinfo /proc/kallsyms
ffffffff8202fc00 B tcp_hashinfo
#

Note: allow specifying mem:tcp_hashinfo:

# perf record -g -e mem:0xffffffff81c65ac0 -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.790 MB perf.data ]
#
# perf evlist
mem:0xffffffff8202fc00
# perf evlist -v
mem:0xffffffff8202fc00: type: 5, size: 112, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU, disabled: 1, inherit: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, bp_type: 3, { bp_addr, config1 }: 0xffffffff8202fc00, { bp_len, config2 }: 0x4
#

Then, after this patch:

# perf script
swapper 0 [000] 171036.986988: mem:0xffffffff8202fc00:
8a0fb5 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux)
8bc09d tcp_v4_early_demux (/lib/modules/4.3.0+/build/vmlinux)
896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux)
8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux)
855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
8572a8 process_backlog (/lib/modules/4.3.0+/build/vmlinux)
856b11 net_rx_action (/lib/modules/4.3.0+/build/vmlinux)
2a284b __do_softirq (/lib/modules/4.3.0+/build/vmlinux)
2a2ba3 irq_exit (/lib/modules/4.3.0+/build/vmlinux)
96b7a4 do_IRQ (/lib/modules/4.3.0+/build/vmlinux)
969807 ret_from_intr (/lib/modules/4.3.0+/build/vmlinux)
804c27 cpuidle_enter (/lib/modules/4.3.0+/build/vmlinux)
2ded22 call_cpuidle (/lib/modules/4.3.0+/build/vmlinux)
2defb6 cpu_startup_entry (/lib/modules/4.3.0+/build/vmlinux)
95d5bc rest_init (/lib/modules/4.3.0+/build/vmlinux)
1163ffa start_kernel ([kernel.vmlinux].init.text)
11634d7 x86_64_start_reservations ([kernel.vmlinux].init.text)
1163623 x86_64_start_kernel ([kernel.vmlinux].init.text)

Signed-off-by: Wang Nan <***@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Adrian Hunter <***@intel.com>
Cc: David S. Miller <***@davemloft.net>
Cc: Jiri Olsa <***@redhat.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
Link: http://lkml.kernel.org/r/1449541544-67621-16-git-send-email-***@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <***@redhat.com>
---
tools/perf/builtin-script.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3c3f8d0..d259e9a 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -130,6 +130,18 @@ static struct {

.invalid_fields = PERF_OUTPUT_TRACE,
},
+
+ [PERF_TYPE_BREAKPOINT] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+ PERF_OUTPUT_PERIOD,
+
+ .invalid_fields = PERF_OUTPUT_TRACE,
+ },
};

static bool output_set_by_user(void)
@@ -1129,6 +1141,8 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
type = PERF_TYPE_TRACEPOINT;
else if (!strcmp(str, "raw"))
type = PERF_TYPE_RAW;
+ else if (!strcmp(str, "break"))
+ type = PERF_TYPE_BREAKPOINT;
else {
fprintf(stderr, "Invalid event type in field string.\n");
rc = -EINVAL;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:31:11 UTC
Permalink
bpf__apply_obj_config() is introduced as the core API to apply object
config options to all BPF objects. This patch also does the real work
for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value
stored in map's private field into the BPF map.

This patch is required because we are not always able to set all
BPF config during parsing. Further patch will set events created
by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist
until perf_evsel__open().

bpf_map_foreach_key() is introduced to iterate over each key
needs to be configured. This function would be extended to support
more map types and different key settings.

In perf record, before start recording, call bpf__apply_config() to
turn on all BPF config options.

Test result:

# cat ./test_bpf_map_1.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_ARRAY = 2,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
struct bpf_map_def SEC("maps") channel = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1,
};
SEC("func=sys_nanosleep")
int func(void *ctx)
{
int key = 0;
char fmt[] = "%d\n";
int *pval = map_lookup_elem(&channel, &key);
if (!pval)
return 0;
bpf_trace_printk(fmt, sizeof(fmt), *pval);
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************* END ***************************/


# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e './test_bpf_map_1.c/maps:channel.value=11/' usleep 10
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data ]
# cat /sys/kernel/debug/tracing/trace
# tracer: nop
#
# entries-in-buffer/entries-written: 1/1 #P:8
[SNIP]
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
usleep-18593 [007] d... 2394714.395539: : 11
# ./perf record -e './test_bpf_map.c/maps:channel.value=101/' usleep 10
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data ]
# cat /sys/kernel/debug/tracing/trace
# tracer: nop
#
# entries-in-buffer/entries-written: 1/1 #P:8
[SNIP]
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
usleep-18593 [007] d... 2394714.395539: : 11
usleep-19000 [006] d... 2394831.057840: : 101

Signed-off-by: Wang Nan <***@huawei.com>
Signed-off-by: He Kuang <***@huawei.com>
Cc: Alexei Starovoitov <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
Cc: Masami Hiramatsu <***@hitachi.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
---
tools/perf/builtin-record.c | 11 +++
tools/perf/util/bpf-loader.c | 180 +++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/bpf-loader.h | 15 ++++
3 files changed, 206 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 199fc31..8479821 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -32,6 +32,7 @@
#include "util/parse-branch-options.h"
#include "util/parse-regs-options.h"
#include "util/llvm-utils.h"
+#include "util/bpf-loader.h"

#include <unistd.h>
#include <sched.h>
@@ -524,6 +525,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
goto out_child;
}

+ err = bpf__apply_obj_config();
+ if (err) {
+ char errbuf[BUFSIZ];
+
+ bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+ pr_err("ERROR: Apply config to BPF failed: %s\n",
+ errbuf);
+ goto out_child;
+ }
+
/*
* Normally perf_session__new would do this, but it doesn't have the
* evlist.
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 7d361aa..96fd18b 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -7,6 +7,7 @@

#include <linux/bpf.h>
#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
#include <linux/err.h>
#include <linux/string.h>
#include "perf.h"
@@ -984,6 +985,178 @@ out:

}

+typedef int (*map_config_func_t)(const char *name, int map_fd,
+ struct bpf_map_def *pdef,
+ struct bpf_map_op *op,
+ void *pkey, void *arg);
+
+static int
+foreach_key_array_all(map_config_func_t func,
+ void *arg, const char *name,
+ int map_fd, struct bpf_map_def *pdef,
+ struct bpf_map_op *op)
+{
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < pdef->max_entries; i++) {
+ err = func(name, map_fd, pdef, op, &i, arg);
+ if (err) {
+ pr_debug("ERROR: failed to insert value to %s[%u]\n",
+ name, i);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int
+bpf_map_config_foreach_key(struct bpf_map *map,
+ map_config_func_t func,
+ void *arg)
+{
+ int err, map_fd;
+ const char *name;
+ struct bpf_map_op *op;
+ struct bpf_map_def def;
+ struct bpf_map_priv *priv;
+
+ name = bpf_map__get_name(map);
+
+ err = bpf_map__get_private(map, (void **)&priv);
+ if (err) {
+ pr_debug("ERROR: failed to get private from map %s\n", name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+ if (!priv || list_empty(&priv->ops_list)) {
+ pr_debug("INFO: nothing to config for map %s\n", name);
+ return 0;
+ }
+
+ err = bpf_map__get_def(map, &def);
+ if (err) {
+ pr_debug("ERROR: failed to get definition from map %s\n", name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+ map_fd = bpf_map__get_fd(map);
+ if (map_fd < 0) {
+ pr_debug("ERROR: failed to get fd from map %s\n", name);
+ return map_fd;
+ }
+
+ list_for_each_entry(op, &priv->ops_list, list) {
+ switch (def.type) {
+ case BPF_MAP_TYPE_ARRAY:
+ switch (op->key_type) {
+ case BPF_MAP_KEY_ALL:
+ return foreach_key_array_all(func, arg, name,
+ map_fd, &def, op);
+ default:
+ pr_debug("ERROR: keytype for map '%s' invalid\n",
+ name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+ default:
+ pr_debug("ERROR: type of '%s' incorrect\n", name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+ }
+ }
+
+ return 0;
+}
+
+static int
+apply_config_value_for_key(int map_fd, void *pkey,
+ size_t val_size, u64 val)
+{
+ int err = 0;
+
+ switch (val_size) {
+ case 1: {
+ u8 _val = (u8)(val);
+ err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+ break;
+ }
+ case 2: {
+ u16 _val = (u16)(val);
+ err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+ break;
+ }
+ case 4: {
+ u32 _val = (u32)(val);
+ err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+ break;
+ }
+ case 8: {
+ err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
+ break;
+ }
+ default:
+ pr_debug("ERROR: invalid value size\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+ }
+ if (err && errno)
+ err = -errno;
+ return err;
+}
+
+static int
+apply_obj_config_map_for_key(const char *name, int map_fd,
+ struct bpf_map_def *pdef __maybe_unused,
+ struct bpf_map_op *op,
+ void *pkey, void *arg __maybe_unused)
+{
+ int err;
+
+ switch (op->op_type) {
+ case BPF_MAP_OP_SET_VALUE:
+ err = apply_config_value_for_key(map_fd, pkey,
+ pdef->value_size,
+ op->v.value);
+ break;
+ default:
+ pr_debug("ERROR: unknown value type for '%s'\n", name);
+ err = -BPF_LOADER_ERRNO__INTERNAL;
+ }
+ return err;
+}
+
+static int
+apply_obj_config_map(struct bpf_map *map)
+{
+ return bpf_map_config_foreach_key(map,
+ apply_obj_config_map_for_key,
+ NULL);
+}
+
+static int
+apply_obj_config_object(struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ int err;
+
+ bpf_map__for_each(map, obj) {
+ err = apply_obj_config_map(map);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int bpf__apply_obj_config(void)
+{
+ struct bpf_object *obj, *tmp;
+ int err;
+
+ bpf_object__for_each_safe(obj, tmp) {
+ err = apply_obj_config_object(obj);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
#define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START)
#define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
#define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -1138,3 +1311,10 @@ int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
bpf__strerror_end(buf, size);
return 0;
}
+
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
+{
+ bpf__strerror_head(err, buf, size);
+ bpf__strerror_end(buf, size);
+ return 0;
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 2464db9..db3c34c 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -71,6 +71,8 @@ int bpf__strerror_config_obj(struct bpf_object *obj,
struct perf_evlist *evlist,
int *error_pos, int err, char *buf,
size_t size);
+int bpf__apply_obj_config(void);
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
#else
static inline struct bpf_object *
bpf__prepare_load(const char *filename __maybe_unused,
@@ -111,6 +113,12 @@ bpf__config_obj(struct bpf_object *obj __maybe_unused,
}

static inline int
+bpf__apply_obj_config(void)
+{
+ return 0;
+}
+
+static inline int
__bpf_strerror(char *buf, size_t size)
{
if (!size)
@@ -156,5 +164,12 @@ bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
{
return __bpf_strerror(buf, size);
}
+
+static inline int
+bpf__strerror_apply_obj_config(int err __maybe_unused,
+ char *buf, size_t size)
+{
+ return __bpf_strerror(buf, size);
+}
#endif
#endif
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Wang Nan
2015-12-08 02:31:26 UTC
Permalink
Namhyung Kim pointed out a potential problem in original code that it
fetches names of maps from section header string table, which is used
to store section names.

Original code doesn't cause error because of a LLVM behavior that, it
combines shstrtab into strtab. For example:

$ echo 'int func() {return 0;}' | x86_64-oe-linux-clang -x c -o temp.o -c -
$ readelf -h ./temp.o
ELF Header:
Magic: 7f 45 4c 46 02 01 01 03 00 00 00 00 00 00 00 00
...
Section header string table index: 1
$ readelf -S ./temp.o
There are 10 section headers, starting at offset 0x288:

Section Headers:
[Nr] Name Type Address Offset
Size EntSize Flags Link Info Align
[ 0] NULL 0000000000000000 00000000
0000000000000000 0000000000000000 0 0 0
[ 1] .strtab STRTAB 0000000000000000 00000230
0000000000000051 0000000000000000 0 0 1
...
$ readelf -p .strtab ./temp.o

String dump of section '.strtab':
[ 1] .text
[ 7] .comment
[ 10] .bss
[ 15] .note.GNU-stack
[ 25] .rela.eh_frame
[ 34] func
[ 39] .strtab
[ 41] .symtab
[ 49] .data
[ 4f] -

$ readelf -p .shstrtab ./temp.o
readelf: Warning: Section '.shstrtab' was not dumped because it does not exist!

Where, 'section header string table index' points to '.strtab', and
symbol names are also stored there.

However, in case of gcc:

$ echo 'int func() {return 0;}' | gcc -x c -o temp.o -c -
$ readelf -p .shstrtab ./temp.o

String dump of section '.shstrtab':
[ 1] .symtab
[ 9] .strtab
[ 11] .shstrtab
[ 1b] .text
[ 21] .data
[ 27] .bss
[ 2c] .comment
[ 35] .note.GNU-stack
[ 45] .rela.eh_frame
$ readelf -p .strtab ./temp.o

String dump of section '.strtab':
[ 1] func

They are separated sections.

Although original code doesn't cause error, we'd better use canonical
method for fetching symbol names to avoid potential behavior changing.
This patch learns from readelf's code, fetches string from sh_link
of .symbol section.

Signed-off-by: Wang Nan <***@huawei.com>
Acked-by: Namhyung Kim <***@kernel.org>
Cc: Arnaldo Carvalho de Melo <***@redhat.com>
---
tools/lib/bpf/libbpf.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 16485ab..8334a5a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -195,6 +195,7 @@ struct bpf_object {
Elf *elf;
GElf_Ehdr ehdr;
Elf_Data *symbols;
+ size_t strtabidx;
struct {
GElf_Shdr shdr;
Elf_Data *data;
@@ -547,7 +548,7 @@ bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
continue;

map_name = elf_strptr(obj->efile.elf,
- obj->efile.ehdr.e_shstrndx,
+ obj->efile.strtabidx,
sym.st_name);
map_idx = sym.st_value / sizeof(struct bpf_map_def);
if (map_idx >= obj->nr_maps) {
@@ -630,8 +631,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
pr_warning("bpf: multiple SYMTAB in %s\n",
obj->path);
err = -LIBBPF_ERRNO__FORMAT;
- } else
+ } else {
obj->efile.symbols = data;
+ obj->efile.strtabidx = sh.sh_link;
+ }
} else if ((sh.sh_type == SHT_PROGBITS) &&
(sh.sh_flags & SHF_EXECINSTR) &&
(data->d_size > 0)) {
@@ -667,6 +670,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
goto out;
}

+ if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
+ pr_warning("Corrupted ELF file: index of strtab invalid\n");
+ return LIBBPF_ERRNO__FORMAT;
+ }
if (maps_shndx >= 0)
err = bpf_object__init_maps_name(obj, maps_shndx);
out:
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
tip-bot for Wang Nan
2015-12-14 08:38:46 UTC
Permalink
Commit-ID: 77ba9a5b48a7c742f9a46d26596852e9cfec7900
Gitweb: http://git.kernel.org/tip/77ba9a5b48a7c742f9a46d26596852e9cfec7900
Author: Wang Nan <***@huawei.com>
AuthorDate: Tue, 8 Dec 2015 02:25:30 +0000
Committer: Arnaldo Carvalho de Melo <***@redhat.com>
CommitDate: Fri, 11 Dec 2015 08:53:04 -0300

tools lib bpf: Fetch map names from correct strtab

Namhyung Kim pointed out a potential problem in original code that it
fetches names of maps from section header string table, which is used
to store section names.

Original code doesn't cause error because of a LLVM behavior that, it
combines shstrtab into strtab. For example:

$ echo 'int func() {return 0;}' | x86_64-oe-linux-clang -x c -o temp.o -c -
$ readelf -h ./temp.o
ELF Header:
Magic: 7f 45 4c 46 02 01 01 03 00 00 00 00 00 00 00 00
...
Section header string table index: 1
$ readelf -S ./temp.o
There are 10 section headers, starting at offset 0x288:

Section Headers:
[Nr] Name Type Address Offset
Size EntSize Flags Link Info Align
[ 0] NULL 0000000000000000 00000000
0000000000000000 0000000000000000 0 0 0
[ 1] .strtab STRTAB 0000000000000000 00000230
0000000000000051 0000000000000000 0 0 1
...
$ readelf -p .strtab ./temp.o

String dump of section '.strtab':
[ 1] .text
[ 7] .comment
[ 10] .bss
[ 15] .note.GNU-stack
[ 25] .rela.eh_frame
[ 34] func
[ 39] .strtab
[ 41] .symtab
[ 49] .data
[ 4f] -

$ readelf -p .shstrtab ./temp.o
readelf: Warning: Section '.shstrtab' was not dumped because it does not exist!

Where, 'section header string table index' points to '.strtab', and
symbol names are also stored there.

However, in case of gcc:

$ echo 'int func() {return 0;}' | gcc -x c -o temp.o -c -
$ readelf -p .shstrtab ./temp.o

String dump of section '.shstrtab':
[ 1] .symtab
[ 9] .strtab
[ 11] .shstrtab
[ 1b] .text
[ 21] .data
[ 27] .bss
[ 2c] .comment
[ 35] .note.GNU-stack
[ 45] .rela.eh_frame
$ readelf -p .strtab ./temp.o

String dump of section '.strtab':
[ 1] func

They are separated sections.

Although original code doesn't cause error, we'd better use canonical
method for fetching symbol names to avoid potential behavior changing.
This patch learns from readelf's code, fetches string from sh_link
of .symbol section.

Signed-off-by: Wang Nan <***@huawei.com>
Reported-and-Acked-by: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
Link: http://lkml.kernel.org/r/1449541544-67621-3-git-send-email-***@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <***@redhat.com>
---
tools/lib/bpf/libbpf.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 16485ab..8334a5a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -195,6 +195,7 @@ struct bpf_object {
Elf *elf;
GElf_Ehdr ehdr;
Elf_Data *symbols;
+ size_t strtabidx;
struct {
GElf_Shdr shdr;
Elf_Data *data;
@@ -547,7 +548,7 @@ bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
continue;

map_name = elf_strptr(obj->efile.elf,
- obj->efile.ehdr.e_shstrndx,
+ obj->efile.strtabidx,
sym.st_name);
map_idx = sym.st_value / sizeof(struct bpf_map_def);
if (map_idx >= obj->nr_maps) {
@@ -630,8 +631,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
pr_warning("bpf: multiple SYMTAB in %s\n",
obj->path);
err = -LIBBPF_ERRNO__FORMAT;
- } else
+ } else {
obj->efile.symbols = data;
+ obj->efile.strtabidx = sh.sh_link;
+ }
} else if ((sh.sh_type == SHT_PROGBITS) &&
(sh.sh_flags & SHF_EXECINSTR) &&
(data->d_size > 0)) {
@@ -667,6 +670,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
goto out;
}

+ if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
+ pr_warning("Corrupted ELF file: index of strtab invalid\n");
+ return LIBBPF_ERRNO__FORMAT;
+ }
if (maps_shndx >= 0)
err = bpf_object__init_maps_name(obj, maps_shndx);
out:
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
tip-bot for Wang Nan
2015-12-14 08:38:39 UTC
Permalink
Commit-ID: 973170e66726672518eb935eb0dc0e63876d133d
Gitweb: http://git.kernel.org/tip/973170e66726672518eb935eb0dc0e63876d133d
Author: Wang Nan <***@huawei.com>
AuthorDate: Tue, 8 Dec 2015 02:25:29 +0000
Committer: Arnaldo Carvalho de Melo <***@redhat.com>
CommitDate: Fri, 11 Dec 2015 08:51:09 -0300

tools lib bpf: Check return value of strdup when reading map names

Commit 561bbccac72d08babafaa33fd7fa9100ec4c9fb6 ("tools lib bpf:
Extract and collect map names from BPF object file") forgets checking
return value of strdup(). This patch fixes it. It also checks names
pointer before strcmp() for safety.

Signed-off-by: Wang Nan <***@huawei.com>
Acked-by: Namhyung Kim <***@kernel.org>
Cc: Zefan Li <***@huawei.com>
Cc: ***@163.com
Fixes: 561bbccac72d ("tools lib bpf: Extract and collect map names from BPF object file")
Link: http://lkml.kernel.org/r/1449541544-67621-2-git-send-email-***@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <***@redhat.com>
---
tools/lib/bpf/libbpf.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a298614..16485ab 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -527,14 +527,14 @@ bpf_object__init_maps(struct bpf_object *obj, void *data,
return 0;
}

-static void
+static int
bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
{
int i;
Elf_Data *symbols = obj->efile.symbols;

if (!symbols || maps_shndx < 0)
- return;
+ return -EINVAL;

for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
GElf_Sym sym;
@@ -556,9 +556,14 @@ bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
continue;
}
obj->maps[map_idx].name = strdup(map_name);
+ if (!obj->maps[map_idx].name) {
+ pr_warning("failed to alloc map name\n");
+ return -ENOMEM;
+ }
pr_debug("map %zu is \"%s\"\n", map_idx,
obj->maps[map_idx].name);
}
+ return 0;
}

static int bpf_object__elf_collect(struct bpf_object *obj)
@@ -663,7 +668,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
}

if (maps_shndx >= 0)
- bpf_object__init_maps_name(obj, maps_shndx);
+ err = bpf_object__init_maps_name(obj, maps_shndx);
out:
return err;
}
@@ -1372,7 +1377,7 @@ bpf_object__get_map_by_name(struct bpf_object *obj, const char *name)
struct bpf_map *pos;

bpf_map__for_each(pos, obj) {
- if (strcmp(pos->name, name) == 0)
+ if (pos->name && !strcmp(pos->name, name))
return pos;
}
return NULL;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Loading...