Discussion:
[PATCH 0/9] perf/jit: various improvements
(too old to reply)
Stephane Eranian
2016-10-13 11:01:41 UTC
Permalink
From: Stephane Eranian <***@gmail.com>

This patch series provides several important enhancements to the perf jit code
profiling support. The following is changed:
- enable building pref jitdump support without DWARF (Maciej)
- Add Exception Handling (EH) Frame record type to support unwinding
- remove useless padding of data in jitdump file
- provide a specification document for the jitdump format to help jit runtime developers add support

Note that the runtime needs to emit the unwind information. This is not supported for openJDK via JVMTI.

Thanks to Maciej, Stefano, and Ross for their contributions.

Maciej Debski (1):
perf/jit: enable jitdump support without dwarf

Stefano Sanfilippo (5):
perf/jit: make perf skip unknown records
perf/jit: do not assume pgoff is zero
perf/jit: add unwinding support
perf/jit: generate .eh_frame/.eh_frame_hdr in DSO
perf/jit: Check JITHEADER_VERSION

Stephane Eranian (3):
perf/jit: improve error messages from JVMTI
perf/jit: remove unecessary padding in jitdump file
perf/jit: add jitdump format specification document

tools/perf/Documentation/jitdump-specification.txt | 170 +++++++++++++++++++++
tools/perf/Makefile.config | 2 -
tools/perf/jvmti/jvmti_agent.c | 38 +----
tools/perf/jvmti/libjvmti.c | 37 +++--
tools/perf/util/Build | 2 +-
tools/perf/util/genelf.c | 109 ++++++++++++-
tools/perf/util/genelf.h | 5 +-
tools/perf/util/jitdump.c | 80 ++++++++--
tools/perf/util/jitdump.h | 12 ++
tools/perf/util/unwind-libunwind-local.c | 4 +-
10 files changed, 393 insertions(+), 66 deletions(-)
create mode 100644 tools/perf/Documentation/jitdump-specification.txt
--
1.9.1
Stephane Eranian
2016-10-13 11:01:51 UTC
Permalink
From: Stefano Sanfilippo <***@chromium.org>

When calculating .eh_frame_hdr base and LUT offsets do not
always assume that pgoff is zero.

The assumption is false for DSOs built from the jitdump by
perf inject, because the ELF header did not exist in memory at
sampling time.

Signed-off-by: Stefano Sanfilippo <***@chromium.org>
Signed-off-by: Ross McIlroy <***@chromium.org>
Reviewed-by: Stephane Eranian <***@google.com>
---
tools/perf/util/unwind-libunwind-local.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 20c2e5743903..6fec84dff3f7 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -357,8 +357,8 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
di.format = UNW_INFO_FORMAT_REMOTE_TABLE;
di.start_ip = map->start;
di.end_ip = map->end;
- di.u.rti.segbase = map->start + segbase;
- di.u.rti.table_data = map->start + table_data;
+ di.u.rti.segbase = map->start + segbase - map->pgoff;
+ di.u.rti.table_data = map->start + table_data - map->pgoff;
di.u.rti.table_len = fde_count * sizeof(struct table_entry)
/ sizeof(unw_word_t);
ret = dwarf_search_unwind_table(as, ip, &di, pi,
--
1.9.1
Stephane Eranian
2016-10-13 11:02:02 UTC
Permalink
From: Stefano Sanfilippo <***@chromium.org>

Check the version number when opening a jitdump file.
Accept older versions, but not newer ones.

Signed-off-by: Stefano Sanfilippo <***@chromium.org>
Signed-off-by: Ross McIlroy <***@chromium.org>
Reviewed-by: Stephane Eranian <***@google.com>
---
tools/perf/util/jitdump.c | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index c47fba63274b..c5dabb3a81fd 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -180,6 +180,12 @@ jit_open(struct jit_buf_desc *jd, const char *name)
header.elf_mach,
jd->use_arch_timestamp);

+ if (header.version > JITHEADER_VERSION) {
+ pr_err("wrong jitdump version %u, expected " STR(JITHEADER_VERSION),
+ header.version);
+ goto error;
+ }
+
if (header.flags & JITDUMP_FLAGS_RESERVED) {
pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
(unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED);
--
1.9.1
Stephane Eranian
2016-10-13 11:02:10 UTC
Permalink
From: Stefano Sanfilippo <***@chromium.org>

The behaviour before this commit was to skip the remaining
portion of the jitdump in case an unknown record was found,
including those records that perf could handle.

With this change, parsing a record with an unknown id will
cause a warning to be emitted, the record will be skipped
and parsing will resume from the next (valid) one.

The patch aims at making perf more future proof, by extracting
as much information as possible from jitdumps.

Signed-off-by: Stefano Sanfilippo <***@chromium.org>
Signed-off-by: Ross McIlroy <***@chromium.org>
Reviewed-by: Stephane Eranian <***@google.com>
---
tools/perf/util/jitdump.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 95f0884aae02..5db2feb90060 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -263,8 +263,7 @@ jit_get_next_entry(struct jit_buf_desc *jd)
return NULL;

if (id >= JIT_CODE_MAX) {
- pr_warning("next_entry: unknown prefix %d, skipping\n", id);
- return NULL;
+ pr_warning("next_entry: unknown record type %d, skipping\n", id);
}
if (bs > jd->bufsize) {
void *n;
@@ -322,7 +321,8 @@ jit_get_next_entry(struct jit_buf_desc *jd)
break;
case JIT_CODE_MAX:
default:
- return NULL;
+ /* skip unknown record (we have read them) */
+ break;
}
return jr;
}
--
1.9.1
Stephane Eranian
2016-10-13 11:02:23 UTC
Permalink
This patch improves the usefulness of error messages
generated by the JVMTI interfac.e This can help identify
the root cause of a problem by printing the actual error
code. The patch adds a new helper function called print_error().

Signed-off-by: Stephane Eranian <***@google.com>
---
tools/perf/jvmti/libjvmti.c | 37 +++++++++++++++++++++++++++----------
1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index ac12e4b91a92..2d9bc04b79a8 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -12,6 +12,17 @@
static int has_line_numbers;
void *jvmti_agent;

+static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
+{
+ char *err_msg = NULL;
+ jvmtiError err;
+ err = (*jvmti)->GetErrorName(jvmti, ret, &err_msg);
+ if (err == JVMTI_ERROR_NONE) {
+ warnx("%s failed with %s", msg, err_msg);
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)err_msg);
+ }
+}
+
static jvmtiError
do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
jvmti_line_info_t *tab, jint *nr)
@@ -22,8 +33,10 @@ do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
jvmtiError ret;

ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
- if (ret != JVMTI_ERROR_NONE)
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "GetLineNumberTable", ret);
return ret;
+ }

for (i = 0; i < nr_lines; i++) {
if (loc_tab[i].start_location < bci) {
@@ -71,6 +84,8 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
/* free what was allocated for nothing */
(*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
nr_total += (int)nr;
+ } else {
+ print_error(jvmti, "GetLineNumberTable", ret);
}
}
}
@@ -130,7 +145,7 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
&decl_class);
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: cannot get declaring class");
+ print_error(jvmti, "GetMethodDeclaringClass", ret);
return;
}

@@ -144,21 +159,21 @@ compiled_method_load_cb(jvmtiEnv *jvmti,

ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: cannot get source filename ret=%d", ret);
+ print_error(jvmti, "GetSourceFileName", ret);
goto error;
}

ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
&class_sign, NULL);
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: getclassignature failed");
+ print_error(jvmti, "GetClassSignature", ret);
goto error;
}

ret = (*jvmti)->GetMethodName(jvmti, method, &func_name,
&func_sign, NULL);
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: failed getmethodname");
+ print_error(jvmti, "GetMethodName", ret);
goto error;
}

@@ -253,7 +268,7 @@ Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)

ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: acquire compiled_method capability failed");
+ print_error(jvmti, "AddCapabilities", ret);
return -1;
}
ret = (*jvmti)->GetJLocationFormat(jvmti, &format);
@@ -264,7 +279,9 @@ Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)
ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
if (ret == JVMTI_ERROR_NONE)
has_line_numbers = 1;
- }
+ } else if (ret != JVMTI_ERROR_NONE)
+ print_error(jvmti, "GetJLocationFormat", ret);
+

memset(&cb, 0, sizeof(cb));

@@ -273,21 +290,21 @@ Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)

ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb));
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: cannot set event callbacks");
+ print_error(jvmti, "SetEventCallbacks", ret);
return -1;
}

ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL);
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: setnotification failed for method_load");
+ print_error(jvmti, "SetEventNotificationMode(METHOD_LOAD)", ret);
return -1;
}

ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL);
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: setnotification failed on code_generated");
+ print_error(jvmti, "SetEventNotificationMode(CODE_GENERATED)", ret);
return -1;
}
return 0;
--
1.9.1
Nilay Vaish
2016-10-13 20:22:18 UTC
Permalink
Post by Stephane Eranian
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index ac12e4b91a92..2d9bc04b79a8 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -12,6 +12,17 @@
static int has_line_numbers;
void *jvmti_agent;
+static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
+{
+ char *err_msg = NULL;
+ jvmtiError err;
+ err = (*jvmti)->GetErrorName(jvmti, ret, &err_msg);
+ if (err == JVMTI_ERROR_NONE) {
+ warnx("%s failed with %s", msg, err_msg);
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)err_msg);
+ }
+}
Do we not need to release the memory for err_msg if the condition for
the 'if' statement evaluates to false? Is it that we are going to
kill the process, so no need to release the memory?

--
Nilay
Arnaldo Carvalho de Melo
2016-10-14 11:15:19 UTC
Permalink
Post by Nilay Vaish
Post by Stephane Eranian
+++ b/tools/perf/jvmti/libjvmti.c
@@ -12,6 +12,17 @@
+static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
+{
+ char *err_msg = NULL;
+ jvmtiError err;
+ err = (*jvmti)->GetErrorName(jvmti, ret, &err_msg);
+ if (err == JVMTI_ERROR_NONE) {
+ warnx("%s failed with %s", msg, err_msg);
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)err_msg);
+ }
+}
Do we not need to release the memory for err_msg if the condition for
the 'if' statement evaluates to false? Is it that we are going to
kill the process, so no need to release the memory?
I guess that print_error() is called only when an error was returned
somewhere, that ret parameter, then if there was no error
(JVMTI_ERROR_NONE) in translating that numeric code to an string,
err_msg, it can then be used with warnx() (the main purpose of
print_error()) and then deallocated.

For err != JVMTI_ERROR_NONE it silently goes back to the caller that
expected it to print something.

I.e. probably it should have an else clause, something like:

if (err == JVMTI_ERROR_NONE) {
warnx("%s failed with %s", msg, err_msg);
(*jvmti)->Deallocate(jvmti, (unsigned char *)err_msg);
} else {
warnx("%s failed with an unknown error %d", msg, (int)ret);
}

Stephane?

- Arnaldo
Stephane Eranian
2016-10-14 13:27:22 UTC
Permalink
On Fri, Oct 14, 2016 at 4:13 AM, Arnaldo Carvalho de Melo
Post by Arnaldo Carvalho de Melo
Post by Nilay Vaish
Post by Stephane Eranian
+++ b/tools/perf/jvmti/libjvmti.c
@@ -12,6 +12,17 @@
+static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
+{
+ char *err_msg = NULL;
+ jvmtiError err;
+ err = (*jvmti)->GetErrorName(jvmti, ret, &err_msg);
+ if (err == JVMTI_ERROR_NONE) {
+ warnx("%s failed with %s", msg, err_msg);
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)err_msg);
+ }
+}
Do we not need to release the memory for err_msg if the condition for
the 'if' statement evaluates to false? Is it that we are going to
kill the process, so no need to release the memory?
I guess that print_error() is called only when an error was returned
somewhere, that ret parameter, then if there was no error
(JVMTI_ERROR_NONE) in translating that numeric code to an string,
err_msg, it can then be used with warnx() (the main purpose of
print_error()) and then deallocated.
For err != JVMTI_ERROR_NONE it silently goes back to the caller that
expected it to print something.
if (err == JVMTI_ERROR_NONE) {
warnx("%s failed with %s", msg, err_msg);
(*jvmti)->Deallocate(jvmti, (unsigned char *)err_msg);
} else {
warnx("%s failed with an unknown error %d", msg, (int)ret);
}
Stephane?
'I will fix all of the comments over the week-end. I am away from office today.
- Arnaldo
Stephane Eranian
2016-10-13 11:02:34 UTC
Permalink
From: Maciej Debski <***@google.com>

This patch modifies the build dependencies on the jitdump
support in perf. As it stands jitdump was wrongfully made dependent
100% on using DWARF. However, the dwarf dependency, only exist if
generating the source line table in genelf_debug.c. The rest of the
support does not need DWARF.

This patch removes the dependency on DWARF for the entire jitdump
support. It keeps it only for the genelf_debug.c support.

Signed-off-by: Maciej Debski <***@google.com>
Reviewed-by: Stephane Eranian <***@google.com>
---
tools/perf/Makefile.config | 2 --
tools/perf/util/Build | 2 +-
tools/perf/util/genelf.c | 9 +++++++--
tools/perf/util/genelf.h | 2 ++
4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 72edf83d76b7..8cfc310d4358 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -366,10 +366,8 @@ ifndef NO_SDT
endif

ifdef PERF_HAVE_JITDUMP
- ifndef NO_DWARF
$(call detected,CONFIG_JITDUMP)
CFLAGS += -DHAVE_JITDUMP
- endif
endif

ifeq ($(ARCH),powerpc)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index eb60e613d795..1dc67efad634 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -120,7 +120,7 @@ libperf-y += demangle-rust.o
ifdef CONFIG_JITDUMP
libperf-$(CONFIG_LIBELF) += jitdump.o
libperf-$(CONFIG_LIBELF) += genelf.o
-libperf-$(CONFIG_LIBELF) += genelf_debug.o
+libperf-$(CONFIG_DWARF) += genelf_debug.o
endif

CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index c1ef805c6a8f..14a73acc549c 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -19,7 +19,9 @@
#include <limits.h>
#include <fcntl.h>
#include <err.h>
+#ifdef HAVE_DWARF_SUPPORT
#include <dwarf.h>
+#endif

#include "perf.h"
#include "genelf.h"
@@ -157,7 +159,7 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod
int
jit_write_elf(int fd, uint64_t load_addr, const char *sym,
const void *code, int csize,
- void *debug, int nr_debug_entries)
+ void *debug __maybe_unused, int nr_debug_entries __maybe_unused)
{
Elf *e;
Elf_Data *d;
@@ -386,11 +388,14 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
shdr->sh_size = sizeof(bnote);
shdr->sh_entsize = 0;

+#ifdef HAVE_DWARF_SUPPORT
if (debug && nr_debug_entries) {
retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
if (retval)
goto error;
- } else {
+ } else
+#endif
+ {
if (elf_update(e, ELF_C_WRITE) < 0) {
warnx("elf_update 4 failed");
goto error;
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 2fbeb59c4bdd..5c933ac71451 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -4,8 +4,10 @@
/* genelf.c */
int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
const void *code, int csize, void *debug, int nr_debug_entries);
+#ifdef HAVE_DWARF_SUPPORT
/* genelf_debug.c */
int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
+#endif

#if defined(__arm__)
#define GEN_ELF_ARCH EM_ARM
--
1.9.1
Arnaldo Carvalho de Melo
2016-10-13 18:17:36 UTC
Permalink
Post by Stephane Eranian
This patch modifies the build dependencies on the jitdump
support in perf. As it stands jitdump was wrongfully made dependent
100% on using DWARF. However, the dwarf dependency, only exist if
generating the source line table in genelf_debug.c. The rest of the
support does not need DWARF.
This patch removes the dependency on DWARF for the entire jitdump
support. It keeps it only for the genelf_debug.c support.
Please add a:

Fixes: e12b202f8fb9 ("perf jitdump: Build only on supported archs")

Next time, so that scripts can pick patches for stable kernels, etc.

Adding it this time.

Thanks,

- Arnaldo
Post by Stephane Eranian
---
tools/perf/Makefile.config | 2 --
tools/perf/util/Build | 2 +-
tools/perf/util/genelf.c | 9 +++++++--
tools/perf/util/genelf.h | 2 ++
4 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 72edf83d76b7..8cfc310d4358 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -366,10 +366,8 @@ ifndef NO_SDT
endif
ifdef PERF_HAVE_JITDUMP
- ifndef NO_DWARF
$(call detected,CONFIG_JITDUMP)
CFLAGS += -DHAVE_JITDUMP
- endif
endif
ifeq ($(ARCH),powerpc)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index eb60e613d795..1dc67efad634 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -120,7 +120,7 @@ libperf-y += demangle-rust.o
ifdef CONFIG_JITDUMP
libperf-$(CONFIG_LIBELF) += jitdump.o
libperf-$(CONFIG_LIBELF) += genelf.o
-libperf-$(CONFIG_LIBELF) += genelf_debug.o
+libperf-$(CONFIG_DWARF) += genelf_debug.o
endif
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index c1ef805c6a8f..14a73acc549c 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -19,7 +19,9 @@
#include <limits.h>
#include <fcntl.h>
#include <err.h>
+#ifdef HAVE_DWARF_SUPPORT
#include <dwarf.h>
+#endif
#include "perf.h"
#include "genelf.h"
@@ -157,7 +159,7 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod
int
jit_write_elf(int fd, uint64_t load_addr, const char *sym,
const void *code, int csize,
- void *debug, int nr_debug_entries)
+ void *debug __maybe_unused, int nr_debug_entries __maybe_unused)
{
Elf *e;
Elf_Data *d;
@@ -386,11 +388,14 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
shdr->sh_size = sizeof(bnote);
shdr->sh_entsize = 0;
+#ifdef HAVE_DWARF_SUPPORT
if (debug && nr_debug_entries) {
retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
if (retval)
goto error;
- } else {
+ } else
+#endif
+ {
if (elf_update(e, ELF_C_WRITE) < 0) {
warnx("elf_update 4 failed");
goto error;
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 2fbeb59c4bdd..5c933ac71451 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -4,8 +4,10 @@
/* genelf.c */
int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
const void *code, int csize, void *debug, int nr_debug_entries);
+#ifdef HAVE_DWARF_SUPPORT
/* genelf_debug.c */
int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
+#endif
#if defined(__arm__)
#define GEN_ELF_ARCH EM_ARM
--
1.9.1
Arnaldo Carvalho de Melo
2016-10-13 18:51:54 UTC
Permalink
I'm trying to figure this out...
So, building with:

make -C tools/perf LDFLAGS=-static O=/tmp/build/perf install-bin

I get:

[***@jouet linux]$ grep libelf /tmp/build/perf/FEATURE-DUMP
feature-libelf=0
feature-libelf-getphdrnum=0
feature-libelf-gelf_getnote=0
feature-libelf-getshdrstrndx=0
feature-libelf-mmap=0
[***@jouet linux]$

And jitdump is only linked if:

[***@jouet linux]$ grep jitdump tools/perf/*/Build
tools/perf/util/Build:libperf-$(CONFIG_LIBELF) += jitdump.o
[***@jouet linux]$

But:

#ifdef HAVE_JITDUMP
static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
.
.
.
ret = jit_process(inject->session, &inject->output, machine,
event->mmap.filename, sample->pid, &n);
.
.
.
#endif

So we need:

[***@jouet linux]$ git diff
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 8cfc310d4358..cffdd9cf3ebf 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -366,8 +366,10 @@ ifndef NO_SDT
endif

ifdef PERF_HAVE_JITDUMP
+ ifndef NO_LIBELF
$(call detected,CONFIG_JITDUMP)
CFLAGS += -DHAVE_JITDUMP
+ endif
endif

ifeq ($(ARCH),powerpc)
[***@jouet linux]$

To get that fixed.

Please let me know if this suits your needs.

I'll now try building with elfutils static libraries to see if in that case it
all gets linked.

- Arnaldo
Arnaldo Carvalho de Melo
2016-10-13 19:12:46 UTC
Permalink
Post by Stephane Eranian
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 8cfc310d4358..cffdd9cf3ebf 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -366,8 +366,10 @@ ifndef NO_SDT
endif
ifdef PERF_HAVE_JITDUMP
+ ifndef NO_LIBELF
$(call detected,CONFIG_JITDUMP)
CFLAGS += -DHAVE_JITDUMP
+ endif
endif
ifeq ($(ARCH),powerpc)
To get that fixed.
Please let me know if this suits your needs.
I'll now try building with elfutils static libraries to see if in that case it
all gets linked.
After installing these packages, on fedora, i.e. installing what is
needed to statically link against libelf:

elfutils-devel-static
elfutils-libelf-devel-static
zlib-static

Then it all works, libelf is statically linked and we also statically
link the jitdump code:

$ make -C tools/perf LDFLAGS=-static O=/tmp/build/perf install-bin
$ grep libelf /tmp/build/perf/FEATURE-DUMP
feature-libelf=1
feature-libelf-getphdrnum=1
feature-libelf-gelf_getnote=1
feature-libelf-getshdrstrndx=1
feature-libelf-mmap=1
$

And:

[***@jouet linux]$ size /tmp/build/perf/perf
text data bss dec hex filename
4478822 581702 23919240 28979764 1ba3234 /tmp/build/perf/perf
[***@jouet linux]$
[***@jouet linux]$ file /tmp/build/perf/perf
/tmp/build/perf/perf: ELF 64-bit LSB executable, x86-64, version 1 (GNU/Linux), statically linked, for GNU/Linux 2.6.32, BuildID[sha1]=c2950590a0f24d76baa0ce4896414e5d523fefba, not stripped
[***@jouet linux]$
[***@jouet linux]$ nm /tmp/build/perf/perf | grep jit
00000000005007f0 T jit_process
0000000000501920 T jit_write_elf
0000000000431a50 t perf_event__jit_repipe_mmap
0000000000431b20 t perf_event__jit_repipe_mmap2
[***@jouet linux]$

- Arnaldo
Arnaldo Carvalho de Melo
2016-10-13 19:44:42 UTC
Permalink
Post by Arnaldo Carvalho de Melo
Post by Stephane Eranian
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 8cfc310d4358..cffdd9cf3ebf 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -366,8 +366,10 @@ ifndef NO_SDT
endif
ifdef PERF_HAVE_JITDUMP
+ ifndef NO_LIBELF
$(call detected,CONFIG_JITDUMP)
CFLAGS += -DHAVE_JITDUMP
+ endif
endif
ifeq ($(ARCH),powerpc)
To get that fixed.
Please let me know if this suits your needs.
I'll now try building with elfutils static libraries to see if in that case it
all gets linked.
After installing these packages, on fedora, i.e. installing what is
elfutils-devel-static
elfutils-libelf-devel-static
zlib-static
Then it all works, libelf is statically linked and we also statically
$ make -C tools/perf LDFLAGS=-static O=/tmp/build/perf install-bin
$ grep libelf /tmp/build/perf/FEATURE-DUMP
feature-libelf=1
feature-libelf-getphdrnum=1
feature-libelf-gelf_getnote=1
feature-libelf-getshdrstrndx=1
feature-libelf-mmap=1
$
text data bss dec hex filename
4478822 581702 23919240 28979764 1ba3234 /tmp/build/perf/perf
/tmp/build/perf/perf: ELF 64-bit LSB executable, x86-64, version 1 (GNU/Linux), statically linked, for GNU/Linux 2.6.32, BuildID[sha1]=c2950590a0f24d76baa0ce4896414e5d523fefba, not stripped
00000000005007f0 T jit_process
0000000000501920 T jit_write_elf
0000000000431a50 t perf_event__jit_repipe_mmap
0000000000431b20 t perf_event__jit_repipe_mmap2
Ok, now this cset is bisected as causing this problem when built on Alpine Linux 3.4:

CC /tmp/build/perf/util/demangle-java.o
CC /tmp/build/perf/util/demangle-rust.o
CC /tmp/build/perf/util/jitdump.o
CC /tmp/build/perf/util/genelf.o
util/jitdump.c: In function 'jit_process':
util/jitdump.c:622:3: error: 'ret' may be used uninitialized in this function [-Werror=maybe-uninitialized]
fprintf(stderr, "injected: %s (%d)\n", path, ret);
^
util/jitdump.c:584:6: note: 'ret' was declared here
int ret;
^
FLEX /tmp/build/perf/util/parse-events-flex.c


/ $ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-alpine-linux-musl/5.3.0/lto-wrapper
Target: x86_64-alpine-linux-musl
Configured with: /home/buildozer/aports/main/gcc/src/gcc-5.3.0/configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --build=x86_64-alpine-linux-musl --host=x86_64-alpine-linux-musl --target=x86_64-alpine-linux-musl --with-pkgversion='Alpine 5.3.0' --enable-checking=release --disable-fixed-point --disable-libstdcxx-pch --disable-multilib --disable-nls --disable-werror --disable-symvers --enable-__cxa_atexit --enable-esp --enable-cloog-backend --enable-languages=c,c++,objc,java,fortran,ada --disable-libssp --disable-libmudflap --disable-libsanitizer --enable-shared --enable-threads --enable-tls --with-system-zlib
Thread model: posix
gcc version 5.3.0 (Alpine 5.3.0)
/ $

Investigating...

- Arnaldo
Arnaldo Carvalho de Melo
2016-10-13 19:31:53 UTC
Permalink
Post by Arnaldo Carvalho de Melo
Post by Stephane Eranian
This patch modifies the build dependencies on the jitdump
support in perf. As it stands jitdump was wrongfully made dependent
100% on using DWARF. However, the dwarf dependency, only exist if
generating the source line table in genelf_debug.c. The rest of the
support does not need DWARF.
This patch removes the dependency on DWARF for the entire jitdump
support. It keeps it only for the genelf_debug.c support.
Fixes: e12b202f8fb9 ("perf jitdump: Build only on supported archs")
Next time, so that scripts can pick patches for stable kernels, etc.
Adding it this time.
But, this introduces a bug, breaking the perf's static build:

[***@jouet linux]$ git bisect bad
Bisecting: 0 revisions left to test after this (roughly 0 steps)
[f5722b101fb37339001e9524262d3eb08b7b1bfe] perf jit: Enable jitdump support without dwarf
[***@jouet linux]$ rm -rf /tmp/build/perf/ ; mkdir -p /tmp/build/perf ; make -C tools/perf LDFLAGS=-static O=/tmp/build/perf install-bin

<big SNIP>

/tmp/build/perf/perf-in.o: In function `perf_event__jit_repipe_mmap2':
/home/acme/git/linux/tools/perf/builtin-inject.c:298: undefined reference to `jit_process'
/tmp/build/perf/perf-in.o: In function `perf_event__jit_repipe_mmap':
/home/acme/git/linux/tools/perf/builtin-inject.c:260: undefined reference to `jit_process'
collect2: error: ld returned 1 exit status
Makefile.perf:470: recipe for target '/tmp/build/perf/perf' failed
make[1]: *** [/tmp/build/perf/perf] Error 1
Makefile:108: recipe for target 'install-bin' failed
make: *** [install-bin] Error 2
make: Leaving directory '/home/acme/git/linux/tools/perf'
[***@jouet linux]$ git bisect bad
f5722b101fb37339001e9524262d3eb08b7b1bfe is the first bad commit
commit f5722b101fb37339001e9524262d3eb08b7b1bfe
Author: Maciej Debski <***@google.com>
Date: Thu Oct 13 03:59:36 2016 -0700

perf jit: Enable jitdump support without dwarf

This patch modifies the build dependencies on the jitdump support in
perf. As it stands jitdump was wrongfully made dependent 100% on using
DWARF. However, the dwarf dependency, only exist if generating the
source line table in genelf_debug.c. The rest of the support does not
need DWARF.

This patch removes the dependency on DWARF for the entire jitdump
support. It keeps it only for the genelf_debug.c support.

Signed-off-by: Maciej Debski <***@google.com>
Reviewed-by: Stephane Eranian <***@google.com>
Cc: Anton Blanchard <***@ozlabs.org>
Cc: Jiri Olsa <***@redhat.com>
Cc: Namhyung Kim <***@kernel.org>
Cc: Peter Zijlstra <***@infradead.org>
Link: http://lkml.kernel.org/r/1476356383-30100-3-git-send-email-***@google.com
Fixes: e12b202f8fb9 ("perf jitdump: Build only on supported archs")
Signed-off-by: Arnaldo Carvalho de Melo <***@redhat.com>

:040000 040000 281764609ccfd745f984caae6a331b5b9ebd15b0 4f1ebed718481cacdd901f28a05b347a135e46cf M tools
[***@jouet linux]$

Please use:

make -C tools/perf build-test

That will do may build tests, including this one, the static build.

I'm trying to figure this out...

- Arnaldo
Post by Arnaldo Carvalho de Melo
Thanks,
- Arnaldo
Post by Stephane Eranian
---
tools/perf/Makefile.config | 2 --
tools/perf/util/Build | 2 +-
tools/perf/util/genelf.c | 9 +++++++--
tools/perf/util/genelf.h | 2 ++
4 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 72edf83d76b7..8cfc310d4358 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -366,10 +366,8 @@ ifndef NO_SDT
endif
ifdef PERF_HAVE_JITDUMP
- ifndef NO_DWARF
$(call detected,CONFIG_JITDUMP)
CFLAGS += -DHAVE_JITDUMP
- endif
endif
ifeq ($(ARCH),powerpc)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index eb60e613d795..1dc67efad634 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -120,7 +120,7 @@ libperf-y += demangle-rust.o
ifdef CONFIG_JITDUMP
libperf-$(CONFIG_LIBELF) += jitdump.o
libperf-$(CONFIG_LIBELF) += genelf.o
-libperf-$(CONFIG_LIBELF) += genelf_debug.o
+libperf-$(CONFIG_DWARF) += genelf_debug.o
endif
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index c1ef805c6a8f..14a73acc549c 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -19,7 +19,9 @@
#include <limits.h>
#include <fcntl.h>
#include <err.h>
+#ifdef HAVE_DWARF_SUPPORT
#include <dwarf.h>
+#endif
#include "perf.h"
#include "genelf.h"
@@ -157,7 +159,7 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod
int
jit_write_elf(int fd, uint64_t load_addr, const char *sym,
const void *code, int csize,
- void *debug, int nr_debug_entries)
+ void *debug __maybe_unused, int nr_debug_entries __maybe_unused)
{
Elf *e;
Elf_Data *d;
@@ -386,11 +388,14 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
shdr->sh_size = sizeof(bnote);
shdr->sh_entsize = 0;
+#ifdef HAVE_DWARF_SUPPORT
if (debug && nr_debug_entries) {
retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
if (retval)
goto error;
- } else {
+ } else
+#endif
+ {
if (elf_update(e, ELF_C_WRITE) < 0) {
warnx("elf_update 4 failed");
goto error;
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 2fbeb59c4bdd..5c933ac71451 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -4,8 +4,10 @@
/* genelf.c */
int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
const void *code, int csize, void *debug, int nr_debug_entries);
+#ifdef HAVE_DWARF_SUPPORT
/* genelf_debug.c */
int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
+#endif
#if defined(__arm__)
#define GEN_ELF_ARCH EM_ARM
--
1.9.1
Stephane Eranian
2016-10-13 11:02:44 UTC
Permalink
This patch adds a formal specification of the jitdump format. The goal is
to help jit runtime developers implement the jitdump support without having
to read the jvmti code.

Signed-off-by: Stephane Eranian <***@google.com>
---
tools/perf/Documentation/jitdump-specification.txt | 170 +++++++++++++++++++++
1 file changed, 170 insertions(+)
create mode 100644 tools/perf/Documentation/jitdump-specification.txt

diff --git a/tools/perf/Documentation/jitdump-specification.txt b/tools/perf/Documentation/jitdump-specification.txt
new file mode 100644
index 000000000000..f32397dbcb29
--- /dev/null
+++ b/tools/perf/Documentation/jitdump-specification.txt
@@ -0,0 +1,170 @@
+JITDUMP specification version 2
+Last Revised: 09/15/2016
+Author: Stephane Eranian <***@gmail.com>
+
+--------------------------------------------------------
+| Revision | Date | Description |
+--------------------------------------------------------
+| 1 | 09/07/2016 | Initial revision |
+--------------------------------------------------------
+| 2 | 09/15/2016 | Add JIT_CODE_UNWINDING_INFO |
+--------------------------------------------------------
+
+
+I/ Introduction
+
+
+This document describes the jitdump file format. The file is generated by Just-In-time compiler runtimes to save meta-data information about the generated code, such as address, size, and name of generated functions, the native code generated, the source line information. The data may then be used by performance tools, such as Linux perf to generate function and assembly level profiles.
+
+The format is not specific to any particular programming language. It can be extended as need be.
+
+The format of the file is binary. It is self-describing in terms of endianness and is portable across multiple processor architectures.
+
+
+II/ Overview of the format
+
+
+The format requires only sequential accesses, i.e., append only mode. The file starts with a fixed size file header describing the version of the specification, the endianness.
+
+The header is followed by a series of records, each starting with a fixed size header describing the type of record and its size. It is, itself, followed by the payload for the record. Records can have a variable size even for a given type.
+
+Each entry in the file is timestamped. All timestamps must use the same clock source. The CLOCK_MONOTONIC clock source is recommended.
+
+
+III/ Jitdump file header format
+
+Each jitdump file starts with a fixed size header containing the following fields in order:
+
+
+* uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can be used to detect the endianness of the file
+* uint32_t version : a 4-byte value representing the format version. It is currently set to 2
+* uint32_t total_size: size in bytes of file header
+* uint32_t elf_mach : ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)
+* uint32_t pad1 : padding. Reserved for future use
+* uint32_t pid : JIT runtime process identification (OS specific)
+* uint64_t timestamp : timestamp of when the file was created
+* uint64_t flags : a bitmask of flags
+
+The flags currently defined are as follows:
+ * bit 0: JITDUMP_FLAGS_ARCH_TIMESTAMP : set if the jitdump file is using an architecture-specific timestamp clock source. For instance, on x86, one could use TSC directly
+
+IV/ Record header
+
+The file header is immediately followed by records. Each record starts with a fixed size header describing the record that follows.
+
+The record header is specified in order as follows:
+* uint32_t id : a value identifying the record type (see below)
+* uint32_t total_size: the size in bytes of the record including the header.
+* uint64_t timestamp : a timestamp of when the record was created.
+
+The following record types are defined:
+ * Value 0 : JIT_CODE_LOAD : record describing a jitted function
+ * Value 1 : JIT_CODE_MOVE : record describing an already jitted function which is moved
+ * Value 2 : JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function
+ * Value 3 : JIT_CODE_CLOSE : record marking the end of the jit runtime (optional)
+ * Value 4 : JIT_CODE_UNWINDING_INFO: record describing a function unwinding information
+
+ The payload of the record must immediately follow the record header without padding.
+
+V/ JIT_CODE_LOAD record
+
+
+ The record has the following fields following the fixed-size record header in order:
+ * uint32_t pid: OS process id of the runtime generating the jitted code
+ * uint32_t tid: OS thread identification of the runtime thread generating the jitted code
+ * uint64_t vma: virtual address of jitted code start
+ * uint64_t code_addr: code start address for the jitted code. By default vma = code_addr
+ * uint64_t code_size: size in bytes of the generated jitted code
+ * uint64_t code_index: unique identifier for the jitted code (see below)
+ * char[n]: function name in ASCII including the null termination
+ * native code: raw byte encoding of the jitted code
+
+ The record header total_size field is inclusive of all components:
+ * record header
+ * fixed-sized fields
+ * function name string, including termination
+ * native code length
+ * record specific variable data (e.g., array of data entries)
+
+The code_index is used to uniquely identify each jitted function. The index can be a monotonically increasing 64-bit value. Each time a function is jitted it gets a new number. This value is used in case the code for a function is moved and avoids having to issue another JIT_CODE_LOAD record.
+
+The format supports empty functions with no native code.
+
+
+VI/ JIT_CODE_MOVE record
+
+ The record type is optional.
+
+ The record has the following fields following the fixed-size record header in order:
+ * uint32_t pid : OS process id of the runtime generating the jitted code
+ * uint32_t tid : OS thread identification of the runtime thread generating the jitted code
+ * uint64_t vma : new virtual address of jitted code start
+ * uint64_t old_code_addr: previous code address for the same function
+ * uint64_t new_code_addr: alternate new code started address for the jitted code. By default it should be equal to the vma address.
+ * uint64_t code_size : size in bytes of the jitted code
+ * uint64_t code_index : index referring to the JIT_CODE_LOAD code_index record of when the function was initially jitted
+
+
+The MOVE record can be used in case an already jitted function is simply moved by the runtime inside the code cache.
+
+The JIT_CODE_MOVE record cannot come before the JIT_CODE_LOAD record for the same function name. The function cannot have changed name, otherwise a new JIT_CODE_LOAD record must be emitted.
+
+The code size of the function cannot change.
+
+
+VII/ JIT_DEBUG_INFO record
+
+The record type is optional.
+
+The record contains source lines debug information, i.e., a way to map a code address back to a source line. This information may be used by the performance tool.
+
+The record has the following fields following the fixed-size record header in order:
+ * uint64_t code_addr: address of function for which the debug information is generated
+ * uint64_t nr_entry : number of debug entries for the function
+ * debug_entry[n]: array of nr_entry debug entries for the function
+
+The debug_entry describes the source line information. It is defined as follows in order:
+* uint64_t code_addr: address of function for which the debug information is generated
+* uint32_t line : source file line number (starting at 1)
+* uint32_t discrim : column discriminator, 0 is default
+* char name[n] : source file name in ASCII, including null termination
+
+The debug_entry entries are saved in sequence but given that they have variable sizes due to the file name string, they cannot be indexed directly.
+They need to be walked sequentially. The next debug_entry is found at sizeof(debug_entry) + strlen(name) + 1.
+
+IMPORTANT:
+ The JIT_CODE_DEBUG for a given function must always be generated BEFORE the JIT_CODE_LOAD for the function. This facilitates greatly the parser for the jitdump file.
+
+
+VIII/ JIT_CODE_CLOSE record
+
+
+The record type is optional.
+
+The record is used as a marker for the end of the jitted runtime. It can be replaced by the end of the file.
+
+The JIT_CODE_CLOSE record does not have any specific fields, the record header contains all the information needed.
+
+
+IX/ JIT_CODE_UNWINDING_INFO
+
+
+The record type is optional.
+
+The record is used to describe the unwinding information for a jitted function.
+
+The record has the following fields following the fixed-size record header in order:
+
+uint64_t unwind_data_size : the size in bytes of the unwinding data table at the end of the record
+uint64_t eh_frame_hdr_size : the size in bytes of the DWARF EH Frame Header at the start of the unwinding data table at the end of the record
+uint64_t mapped_size : the size of the unwinding data mapped in memory
+const char unwinding_data[n]: an array of unwinding data, consisting of the EH Frame Header, followed by the actual EH Frame
+
+
+The EH Frame header follows the Linux Standard Base (LSB) specification as described in the document at https://refspecs.linuxfoundation.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html
+
+
+The EH Frame follows the LSB specicfication as described in the document at https://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html
+
+
+NOTE: The mapped_size is generally either the same as unwind_data_size (if the unwinding data was mapped in memory by the running process) or zero (if the unwinding data is not mapped by the process). If the unwinding data was not mapped, then only the EH Frame Header will be read, which can be used to specify FP based unwinding for a function which does not have unwinding information.
--
1.9.1
Stephane Eranian
2016-10-13 11:03:04 UTC
Permalink
From: Stefano Sanfilippo <***@chromium.org>

This record is intended to provide unwinding information in the
eh_frame format. This is required to unwind JITed code which
does not maintain the frame pointer register during function calls.

The eh_frame unwinding information can be emitted by V8 / Chromium
when the --perf_prof_unwinding_info is passed.

A record of type jr_code_unwinding_info comes before the jr_code_load
it referred to and contains both the .eh_frame and .eh_frame_hdr.

The fields in the header have the following meaning:

* unwinding_size: size of the eh_frame and eh_frame_hdr, necessary
for distinguishing the content from the padding.

* eh_frame_hdr_size: as the name says.

* mapped_size: size of the payload that was in memory at runtime.
typically unwinding_size if the .eh_frame_hdr and .eh_frame were
mapped, or 0 if they weren't. It should always be the former case,
since the .eh_frame is guaranteed to be mapped in memory. However,
certain JITs might want to inject an .eh_frame_hdr with an empty LUT
to trigger fp-based unwinding fallback in libunwind. The only part
of the .eh_frame_hdr that libunwind reads from remote memory is the
LUT, and since there is none, mapping the unwinding info in memory
is not necessary, and 0 in this field signifies that it wasn't.
This practical hack allows to save bytes in code memory for those
JIT compilers that might or might not maintain a valid frame pointer.

The payload that follows is assumed to contain first the .eh_frame and
then the .eh_header_hdr, with no padding between the two.

Signed-off-by: Stefano Sanfilippo <***@chromium.org>
Signed-off-by: Ross McIlroy <***@chromium.org>
Reviewed-by: Stephane Eranian <***@google.com>
---
tools/perf/util/jitdump.c | 57 ++++++++++++++++++++++++++++++++++++++++++++---
tools/perf/util/jitdump.h | 12 ++++++++++
2 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 5db2feb90060..fdddeca26545 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -37,6 +37,10 @@ struct jit_buf_desc {
bool needs_bswap; /* handles cross-endianess */
bool use_arch_timestamp;
void *debug_data;
+ void *unwinding_data;
+ uint64_t unwinding_size;
+ uint64_t unwinding_mapped_size;
+ uint64_t eh_frame_hdr_size;
size_t nr_debug_entries;
uint32_t code_load_count;
u64 bytes_written;
@@ -295,6 +299,13 @@ jit_get_next_entry(struct jit_buf_desc *jd)
}
}
break;
+ case JIT_CODE_UNWINDING_INFO:
+ if (jd->needs_bswap) {
+ jr->unwinding.unwinding_size = bswap_64(jr->unwinding.unwinding_size);
+ jr->unwinding.eh_frame_hdr_size = bswap_64(jr->unwinding.eh_frame_hdr_size);
+ jr->unwinding.mapped_size = bswap_64(jr->unwinding.mapped_size);
+ }
+ break;
case JIT_CODE_CLOSE:
break;
case JIT_CODE_LOAD:
@@ -370,7 +381,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
u16 idr_size;
const char *sym;
uint32_t count;
- int ret, csize;
+ int ret, csize, usize;
pid_t pid, tid;
struct {
u32 pid, tid;
@@ -380,6 +391,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
pid = jr->load.pid;
tid = jr->load.tid;
csize = jr->load.code_size;
+ usize = jd->unwinding_mapped_size;
addr = jr->load.code_addr;
sym = (void *)((unsigned long)jr + sizeof(jr->load));
code = (unsigned long)jr + jr->load.p.total_size - csize;
@@ -408,6 +420,14 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
jd->nr_debug_entries = 0;
}

+ if (jd->unwinding_data && jd->eh_frame_hdr_size) {
+ free(jd->unwinding_data);
+ jd->unwinding_data = NULL;
+ jd->eh_frame_hdr_size = 0;
+ jd->unwinding_mapped_size = 0;
+ jd->unwinding_size = 0;
+ }
+
if (ret) {
free(event);
return -1;
@@ -422,7 +442,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)

event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
event->mmap2.start = addr;
- event->mmap2.len = csize;
+ event->mmap2.len = usize ? ALIGN_8(csize) + usize : csize;
event->mmap2.pid = pid;
event->mmap2.tid = tid;
event->mmap2.ino = st.st_ino;
@@ -473,6 +493,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
char *filename;
size_t size;
struct stat st;
+ int usize;
u16 idr_size;
int ret;
pid_t pid, tid;
@@ -483,6 +504,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)

pid = jr->move.pid;
tid = jr->move.tid;
+ usize = jd->unwinding_mapped_size;
idr_size = jd->machine->id_hdr_size;

/*
@@ -511,7 +533,8 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
(sizeof(event->mmap2.filename) - size) + idr_size);
event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
event->mmap2.start = jr->move.new_code_addr;
- event->mmap2.len = jr->move.code_size;
+ event->mmap2.len = usize ? ALIGN_8(jr->move.code_size) + usize
+ : jr->move.code_size;
event->mmap2.pid = pid;
event->mmap2.tid = tid;
event->mmap2.ino = st.st_ino;
@@ -578,6 +601,31 @@ static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr)
}

static int
+jit_repipe_unwinding_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ void *unwinding_data;
+ uint32_t unwinding_data_size;
+
+ if (!(jd && jr))
+ return -1;
+
+ unwinding_data_size = jr->prefix.total_size - sizeof(jr->unwinding);
+ unwinding_data = malloc(unwinding_data_size);
+ if (!unwinding_data)
+ return -1;
+
+ memcpy(unwinding_data, &jr->unwinding.unwinding_data,
+ unwinding_data_size);
+
+ jd->eh_frame_hdr_size = jr->unwinding.eh_frame_hdr_size;
+ jd->unwinding_size = jr->unwinding.unwinding_size;
+ jd->unwinding_mapped_size = jr->unwinding.mapped_size;
+ jd->unwinding_data = unwinding_data;
+
+ return 0;
+}
+
+static int
jit_process_dump(struct jit_buf_desc *jd)
{
union jr_entry *jr;
@@ -594,6 +642,9 @@ jit_process_dump(struct jit_buf_desc *jd)
case JIT_CODE_DEBUG_INFO:
ret = jit_repipe_debug_info(jd, jr);
break;
+ case JIT_CODE_UNWINDING_INFO:
+ ret = jit_repipe_unwinding_info(jd, jr);
+ break;
default:
ret = 0;
continue;
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
index bcacd20d0c1c..c6b9b67f43bf 100644
--- a/tools/perf/util/jitdump.h
+++ b/tools/perf/util/jitdump.h
@@ -19,6 +19,7 @@
#define JITHEADER_MAGIC_SW 0x4454694A

#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7)
+#define ALIGN_8(x) (((x) + 7) & (~7))

#define JITHEADER_VERSION 1

@@ -48,6 +49,7 @@ enum jit_record_type {
JIT_CODE_MOVE = 1,
JIT_CODE_DEBUG_INFO = 2,
JIT_CODE_CLOSE = 3,
+ JIT_CODE_UNWINDING_INFO = 4,

JIT_CODE_MAX,
};
@@ -101,12 +103,22 @@ struct jr_code_debug_info {
struct debug_entry entries[0];
};

+struct jr_code_unwinding_info {
+ struct jr_prefix p;
+
+ uint64_t unwinding_size;
+ uint64_t eh_frame_hdr_size;
+ uint64_t mapped_size;
+ const char unwinding_data[0];
+};
+
union jr_entry {
struct jr_code_debug_info info;
struct jr_code_close close;
struct jr_code_load load;
struct jr_code_move move;
struct jr_prefix prefix;
+ struct jr_code_unwinding_info unwinding;
};

static inline struct debug_entry *
--
1.9.1
Stephane Eranian
2016-10-13 11:03:13 UTC
Permalink
From: Stefano Sanfilippo <***@chromium.org>

When the jit_buf_desc contains unwinding information, it is emitted
as eh_frame unwinding sections in the DSOs generated by perf inject.

The unwinding information is required to unwind of JITed code which
do not maintain the frame pointer register during function calls.
It can be emitted by V8 / Chromium when the --perf_prof_unwinding_info
is passed to V8.

The eh_frame and eh_frame_hdr sections are emitted immediately after
the .text.

The .eh_frame is aligned at a 8-byte boundary, and .eh_frame_hdr
at a 4-byte one. Since size of the .eh_frame is required to be a
multiple of the word size, which means there will never be additional
padding between it and the .eh_frame_hdr on machines where the word
size is 4 or 8 bytes.

However, additional padding might be inserted between .text and
eh_frame to reach the correct alignment, which will always be
8 bytes, also on 32bit machines. The reasoning behind this choice is
that 4 extra bytes of padding worst case are not a large cost for the
advantage of removing word-size dependent offset calculations when
emitting the jitdump.

Signed-off-by: Stefano Sanfilippo <***@chromium.org>
Signed-off-by: Ross McIlroy <***@chromium.org>
Reviewed-by: Stephane Eranian <***@google.com>
---
tools/perf/util/genelf.c | 102 ++++++++++++++++++++++++++++++++++++++++++++--
tools/perf/util/genelf.h | 3 +-
tools/perf/util/jitdump.c | 11 +++--
3 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index 14a73acc549c..57845109e662 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -69,6 +69,8 @@ static char shd_string_table[] = {
'.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */
'.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */
'.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */
+ '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', '_', 'h', 'd', 'r', 0, /* 90 */
+ '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', 0, /* 104 */
};

static struct buildid_note {
@@ -149,6 +151,86 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod
}
#endif

+static int
+jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size,
+ uint64_t unwinding_size, uint64_t base_offset)
+{
+ Elf_Data *d;
+ Elf_Scn *scn;
+ Elf_Shdr *shdr;
+ uint64_t unwinding_table_size = unwinding_size - unwinding_header_size;
+
+ /*
+ * setup eh_frame section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ return -1;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ return -1;
+ }
+
+ d->d_align = 8;
+ d->d_off = 0LL;
+ d->d_buf = unwinding;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = unwinding_table_size;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ return -1;
+ }
+
+ shdr->sh_name = 104;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = base_offset;
+ shdr->sh_flags = SHF_ALLOC;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup eh_frame_hdr section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ return -1;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ return -1;
+ }
+
+ d->d_align = 4;
+ d->d_off = 0LL;
+ d->d_buf = unwinding + unwinding_table_size;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = unwinding_header_size;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ return -1;
+ }
+
+ shdr->sh_name = 90;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = base_offset + unwinding_table_size;
+ shdr->sh_flags = SHF_ALLOC;
+ shdr->sh_entsize = 0;
+
+ return 0;
+}
+
/*
* fd: file descriptor open for writing for the output file
* load_addr: code load address (could be zero, just used for buildid)
@@ -159,13 +241,15 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod
int
jit_write_elf(int fd, uint64_t load_addr, const char *sym,
const void *code, int csize,
- void *debug __maybe_unused, int nr_debug_entries __maybe_unused)
+ void *debug __maybe_unused, int nr_debug_entries __maybe_unused,
+ void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size)
{
Elf *e;
Elf_Data *d;
Elf_Scn *scn;
Elf_Ehdr *ehdr;
Elf_Shdr *shdr;
+ uint64_t eh_frame_base_offset;
char *strsym = NULL;
int symlen;
int retval = -1;
@@ -196,7 +280,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
ehdr->e_type = ET_DYN;
ehdr->e_entry = GEN_ELF_TEXT_OFFSET;
ehdr->e_version = EV_CURRENT;
- ehdr->e_shstrndx= 2; /* shdr index for section name */
+ ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */

/*
* setup text section
@@ -233,6 +317,18 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
shdr->sh_entsize = 0;

/*
+ * Setup .eh_frame_hdr and .eh_frame
+ */
+ if (unwinding) {
+ eh_frame_base_offset = ALIGN_8(GEN_ELF_TEXT_OFFSET + csize);
+ retval = jit_add_eh_frame_info(e, unwinding,
+ unwinding_header_size, unwinding_size,
+ eh_frame_base_offset);
+ if (retval)
+ goto error;
+ }
+
+ /*
* setup section headers string table
*/
scn = elf_newscn(e);
@@ -300,7 +396,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
shdr->sh_type = SHT_SYMTAB;
shdr->sh_flags = 0;
shdr->sh_entsize = sizeof(Elf_Sym);
- shdr->sh_link = 4; /* index of .strtab section */
+ shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */

/*
* setup symbols string table
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 5c933ac71451..2424bd9862a3 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -3,7 +3,8 @@

/* genelf.c */
int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
- const void *code, int csize, void *debug, int nr_debug_entries);
+ const void *code, int csize, void *debug, int nr_debug_entries,
+ void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size);
#ifdef HAVE_DWARF_SUPPORT
/* genelf_debug.c */
int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index fdddeca26545..c47fba63274b 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -72,7 +72,10 @@ jit_emit_elf(char *filename,
const void *code,
int csize,
void *debug,
- int nr_debug_entries)
+ int nr_debug_entries,
+ void *unwinding,
+ uint32_t unwinding_header_size,
+ uint32_t unwinding_size)
{
int ret, fd;

@@ -85,7 +88,8 @@ jit_emit_elf(char *filename,
return -1;
}

- ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries);
+ ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries,
+ unwinding, unwinding_header_size, unwinding_size);

close(fd);

@@ -412,7 +416,8 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)

size = PERF_ALIGN(size, sizeof(u64));
uaddr = (uintptr_t)code;
- ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries);
+ ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries,
+ jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size);

if (jd->debug_data && jd->nr_debug_entries) {
free(jd->debug_data);
--
1.9.1
Stephane Eranian
2016-10-13 11:03:25 UTC
Permalink
This patch removes all the string padding generated in
the jitdump file. There are nonecessary and were adding
unecessary complexity. Modern processors can handle unaligned
accesses quite well. The perf.data/jitdump file are always
postp-processed, no need to add extra complexity for no real
gain.

Signed-off-by: Stephane Eranian <***@google.com>
---
tools/perf/jvmti/jvmti_agent.c | 38 +-------------------------------------
1 file changed, 1 insertion(+), 37 deletions(-)

diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index 55daefff0d54..e9651a9d670e 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -44,11 +44,6 @@
static char jit_path[PATH_MAX];
static void *marker_addr;

-/*
- * padding buffer
- */
-static const char pad_bytes[7];
-
static inline pid_t gettid(void)
{
return (pid_t)syscall(__NR_gettid);
@@ -230,7 +225,6 @@ init_arch_timestamp(void)

void *jvmti_open(void)
{
- int pad_cnt;
char dump_path[PATH_MAX];
struct jitheader header;
int fd;
@@ -288,10 +282,6 @@ void *jvmti_open(void)
header.total_size = sizeof(header);
header.pid = getpid();

- /* calculate amount of padding '\0' */
- pad_cnt = PADDING_8ALIGNED(header.total_size);
- header.total_size += pad_cnt;
-
header.timestamp = perf_get_timestamp();

if (use_arch_timestamp)
@@ -301,13 +291,6 @@ void *jvmti_open(void)
warn("jvmti: cannot write dumpfile header");
goto error;
}
-
- /* write padding '\0' if necessary */
- if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) {
- warn("jvmti: cannot write dumpfile header padding");
- goto error;
- }
-
return fp;
error:
fclose(fp);
@@ -349,7 +332,6 @@ jvmti_write_code(void *agent, char const *sym,
static int code_generation = 1;
struct jr_code_load rec;
size_t sym_len;
- size_t padding_count;
FILE *fp = agent;
int ret = -1;

@@ -366,8 +348,6 @@ jvmti_write_code(void *agent, char const *sym,

rec.p.id = JIT_CODE_LOAD;
rec.p.total_size = sizeof(rec) + sym_len;
- padding_count = PADDING_8ALIGNED(rec.p.total_size);
- rec.p. total_size += padding_count;
rec.p.timestamp = perf_get_timestamp();

rec.code_size = size;
@@ -393,9 +373,6 @@ jvmti_write_code(void *agent, char const *sym,
ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
fwrite_unlocked(sym, sym_len, 1, fp);

- if (padding_count)
- fwrite_unlocked(pad_bytes, padding_count, 1, fp);
-
if (code)
fwrite_unlocked(code, size, 1, fp);

@@ -412,7 +389,6 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
{
struct jr_code_debug_info rec;
size_t sret, len, size, flen;
- size_t padding_count;
uint64_t addr;
const char *fn = file;
FILE *fp = agent;
@@ -443,16 +419,10 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
* int : line number
* int : column discriminator
* file[] : source file name
- * padding : pad to multiple of 8 bytes
*/
size += nr_lines * sizeof(struct debug_entry);
size += flen * nr_lines;
- /*
- * pad to 8 bytes
- */
- padding_count = PADDING_8ALIGNED(size);
-
- rec.p.total_size = size + padding_count;
+ rec.p.total_size = size;

/*
* If JVM is multi-threaded, nultiple concurrent calls to agent
@@ -486,12 +456,6 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
if (sret != 1)
goto error;
}
- if (padding_count) {
- sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp);
- if (sret != 1)
- goto error;
- }
-
funlockfile(fp);
return 0;
error:
--
1.9.1
Loading...