Compare commits

...

264 Commits
v1.2 ... jstack

Author SHA1 Message Date
Andrey Pangin
a20d771635 Get asynchronous thread dump (jstack) 2021-01-30 01:55:07 +03:00
Andrey Pangin
3a44bb6ba6 Merge branch 'master' into v2.0 2021-01-29 02:05:55 +03:00
Andrey Pangin
f73ac36c9c Fixed symbol resolution when return address points beyond the function 2021-01-29 02:05:26 +03:00
Andrey Pangin
c94b1685cf Merge branch 'master' into v2.0 2021-01-29 01:33:50 +03:00
Andrey Pangin
90d4420d3f Make all symbols private by default for better compiler optimization 2021-01-29 01:32:37 +03:00
Andrey Pangin
a96501a26a Enable native stacks for non-signal events, e.g. lock profiling 2021-01-29 00:09:15 +03:00
Andrey Pangin
39f84be219 Write profiler settings in JFR 2021-01-27 00:12:29 +03:00
Andrey Pangin
4af327e2c1 Write JVM info, system properties, and profiler version in JFR 2021-01-26 02:43:46 +03:00
Andrey Pangin
61919df2ff Make symbols private by default to improve gcc optimizations 2021-01-26 02:42:43 +03:00
Andrey Pangin
26880ecb22 #93: Basic POM for publishing async-profiler Java API to Maven Central 2021-01-14 05:05:10 +03:00
Ivan Zemlyanskiy
af02f6b0fb Migrate documentation from README.md to Wiki (#383) 2021-01-14 00:32:31 +03:00
Andrey Pangin
c11d4ca487 Add OS and CPU information in JFR output 2021-01-12 05:07:02 +03:00
Andrey Pangin
b2dfe9b5b0 Fixed compilation on JDK 7 2021-01-10 20:51:02 +03:00
Andrey Pangin
5585a77355 Merged master->v2.0 2021-01-10 20:46:59 +03:00
Andrey Pangin
b5a67c2b95 Release 1.8.3 2021-01-06 17:53:37 +03:00
Andrey Pangin
9aea04a56a New safemode=32 for sanity check of top Java frames 2021-01-06 17:44:34 +03:00
Andrey Pangin
a48f77b380 #377: Fix JvmtiEnv::GetStackTrace problem after RedefineClasses 2020-12-24 03:05:01 +03:00
Andrey Pangin
8c5f6c1357 Gracefully stop profiler when terminating JVM 2020-12-24 02:58:38 +03:00
Andrey Pangin
88730d4388 Fixed possible deadlock on non-HotSpot JVMs 2020-12-24 02:57:56 +03:00
Andrey Pangin
d132777a60 #378: Create libasyncProfiler.dylib symlink on macOS 2020-12-10 01:12:40 +03:00
Andrey Pangin
04dac10d41 JFRv2 parser. Added JFR->FlameGraph converter; fixed FlameScope converter 2020-12-06 21:20:41 +03:00
Andrey Pangin
5290b81190 Attempt to recover stack trace from String.indexOf intrinsic 2020-11-15 23:38:42 +03:00
Andrey Pangin
93e1f963ef Links to v2.0 Early access 2020-11-09 04:51:48 +03:00
Andrey Pangin
a18af69f8b Minor build fixes 2020-11-09 04:50:02 +03:00
Andrey Pangin
60cac04c24 2.0-b1 Early Access 2020-11-09 04:33:35 +03:00
Andrey Pangin
3d7e8efd3b Changelog 2020-11-09 04:15:22 +03:00
Andrey Pangin
d26d69e550 Returned tree output format 2020-11-09 04:14:15 +03:00
Andrey Pangin
8160e49c14 Dump flat profile in text format 2020-11-07 04:04:18 +03:00
Andrey Pangin
731ac31064 Bias JFR buffers to threads. Distinguish TLAB/outside allocations in Flame Graph 2020-11-07 00:21:30 +03:00
Andrey Pangin
013ceee55d Resurrected FlameGraph and collapsed output formats 2020-11-05 04:54:19 +03:00
Andrey Pangin
f7ef0e97b2 Clean-room FlameGraph implementation. Removed 3rd party copyrighted code. No more CDDL license 2020-11-04 20:47:43 +03:00
Andrey Pangin
c01fe588ce Merge branch 'master' into v2.0 2020-11-04 18:32:58 +03:00
Andrey Pangin
e498ad27d2 Improved HTML FlameGraph performance 2020-11-04 04:29:36 +03:00
Andrey Pangin
edb31a0f79 Merge branch 'master' into v2.0
# Conflicts:
#	CHANGELOG.md
#	src/allocTracer.cpp
#	src/allocTracer.h
2020-11-02 03:54:29 +03:00
Andrey Pangin
13394b7125 Release 1.8.2 2020-11-02 02:54:32 +03:00
Andrey Pangin
d227a83e42 Fixed warnings on JDK 15 and 16 2020-11-02 02:16:41 +03:00
Andrey Pangin
7e8ad02ccb Fixed allocation sizes on JDK 8u262+ 2020-11-02 00:40:47 +03:00
Andrey Pangin
450f251732 Support 32-bit systems 2020-11-01 04:40:25 +03:00
Andrey Pangin
53ca190457 Merge branch 'master' into v2.0 2020-11-01 02:21:16 +03:00
Andrey Pangin
683144a907 Release 1.8.2 2020-11-01 00:58:29 +03:00
Andrey Pangin
02b65627cd Merge branch 'master' into v2.0
# Conflicts:
#	src/flightRecorder.cpp
#	src/profiler.h
2020-10-31 23:09:24 +03:00
Andrey Pangin
48e4fd5035 #363: New native libraries are not tracked in JDK 15 2020-10-31 22:46:38 +03:00
Andrey Pangin
642a1ac7fb Timers for macOS and Linux; jdk.CPULoad event 2020-10-31 02:54:12 +03:00
Andrey Pangin
114e711fd6 jdk.ActiveRecording event 2020-10-30 03:59:41 +03:00
Andrey Pangin
f833f41b46 jdk.CPULoad event 2020-10-30 03:10:21 +03:00
Andrey Pangin
a82163b703 Line numbers in JFR output 2020-10-26 01:15:58 +03:00
Andrey Pangin
6b49cfa9be JFRv2 2020-10-21 03:41:12 +03:00
Andrey Pangin
6c26e5ae69 Added link to AArch64 build 2020-10-18 17:37:44 +03:00
Andrey Pangin
1634380a16 Added tag for aarch64 release 2020-10-18 04:04:52 +03:00
Andrey Pangin
1a6e582ad7 #356: 'resume' command continues writing JFR instead of creating a new file 2020-10-17 23:44:27 +03:00
Andrey Pangin
4b5a17b336 #350: More careful native stack walking in wall-clock mode 2020-09-22 03:05:27 +03:00
Andrey Pangin
8392e568f4 #351: Updated README instructions to check libjvm debug symbols 2020-09-22 00:43:01 +03:00
Andrey Pangin
d7d56c762b Merge branch 'master' into v2.0
# Conflicts:
#	src/instrument.cpp
#	src/profiler.h
2020-09-14 21:54:47 +03:00
Andrey Pangin
a4c6d42677 Release 1.8.1 2020-09-05 01:02:19 +03:00
Andrey Pangin
b7e907884b Fixed profiler check command after #347 2020-09-04 04:26:05 +03:00
Andrey Pangin
5b69492dba #347: Do not read /proc/kallsyms when --all-user is specified 2020-09-04 02:14:59 +03:00
James Yuzawa
5a789bda42 Clean up debug symbols instructions and troubleshooting (#345) 2020-09-03 00:31:05 +03:00
James Yuzawa
a010f387b3 Specify JVM process by 'jps' application name (#346) 2020-09-03 00:17:46 +03:00
Andrey Pangin
61d5cdcd68 Profile multiple events from the command line 2020-08-30 03:02:43 +03:00
Andrey Pangin
2b14ee69ef #340: UnsatisfiedLinkError when attaching method profiler 2020-08-12 23:02:26 +03:00
Andrey Pangin
048b54621d Redo fix for #328 2020-08-12 23:02:05 +03:00
Andrey Pangin
94d406c531 Revert README 2020-08-11 22:37:10 +03:00
Andrey Pangin
800580bb30 Release 1.8 2020-08-10 23:18:20 +03:00
Andrey Pangin
8cecd2df9b #339: Report invalid interval argument 2020-08-09 18:50:59 +03:00
Andrey Pangin
d86883043a #330: Release packages should be extracted into separate folder 2020-08-09 18:35:26 +03:00
Andrey Pangin
d0772ba62c Added collapsed->FlameGraph converter 2020-08-09 16:06:13 +03:00
Andrey Pangin
d6d4a3c2a3 FlameGraph: skip empty lines 2020-08-06 23:07:28 +03:00
Andrey Pangin
49f9050bf5 FlameGraph minor cosmetic changes 2020-08-06 01:55:47 +03:00
Andrey Pangin
67b77b9645 Improved FlameGraph converter 2020-08-05 02:04:19 +03:00
Andrey Pangin
971fc85d1c FlameGraph 2.0 on HTML5 canvas 2020-08-04 23:26:04 +03:00
Andrey Pangin
50b9fe4d85 Merged with master 2020-08-04 19:52:27 +03:00
Jason Zaugg
f9db1099f9 Register natives one-by-one to support partial Java API implementations (#337) 2020-08-04 19:05:08 +03:00
Andrey Pangin
adce201837 #335: Fixed unsafe thread local storage access 2020-07-29 23:26:39 +03:00
Andrey Pangin
a905d50e00 #328, #14: Fixed long attach time and slow class loading 2020-07-29 22:20:45 +03:00
Andrey Pangin
f006e00443 async-profiler 2.0: Record cpu+alloc+lock in a single JFR. No framebuffer/hashtable limits. 2020-07-27 01:35:07 +03:00
Andrey Pangin
5ef449c2ed #335: Do not restart poll() calls with finite timeout 2020-07-20 00:14:48 +03:00
Andrey Pangin
d9ca3e42a8 #329: Support both ARM and THUMB flavors of JDK binaries 2020-06-19 02:37:19 +03:00
Claus F. Strasburger
269bef2867 profiler.sh: work on minimal systems (#303) 2020-06-15 00:28:30 +03:00
Andrey Pangin
e62cb2cfd1 #327: Per-thread reverse flamegraph / call tree 2020-05-31 08:34:13 +03:00
Andrey Pangin
7135840f70 Compilation fix 2020-05-25 01:32:53 +03:00
Andrey Pangin
31ddc2f562 #248: Converter to the format supported by FlameScope 2020-05-25 00:33:42 +03:00
Sergei Egorov
a5beee66ff Update the Java Agent options link to the latest release (#322) 2020-05-17 23:35:36 +03:00
Andrey Pangin
c15439348f javadoc comment 2020-05-17 15:15:36 +03:00
Andrey Pangin
17fe36e43e Release 1.7.1 2020-05-14 03:53:03 +03:00
Andrey Pangin
5312a793ec 'safemode' option to disable stack recovery techniques 2020-05-14 02:34:30 +03:00
Kirill Timofeev
4d43db91e1 Ensure code blob exists properly (#316)
Check that code blob is not removed to avoid returning NULL
when another code blob loaded at similar address range
that was used by removed one

Co-authored-by: Simon Ogorodnik <Simon.Ogorodnik@jetbrains.com>
2020-05-13 21:49:29 +03:00
Andrey Pangin
0020af54a3 LBR call stack support (--cstack lbr) 2020-04-21 23:47:53 +03:00
Andrey Pangin
f67d392ad8 Synthesize symbol names for PLT entries 2020-04-21 22:39:13 +03:00
Andrei Pangin
ff70da1736 Added --filter <thread-ids> for wall-clock profiling mode (#315) 2020-04-15 00:27:56 +03:00
Andrey ``Bass'' Shcheglov
07438daa70 Clean up the Makefile (#314) 2020-04-15 00:26:01 +03:00
Andrey Pangin
e1e8aa068a #310: Fixed crash on Zing 2020-03-27 18:20:12 +03:00
Andrey Pangin
a2691f919e #309: README paragraph about missed output file 2020-03-19 18:25:08 +03:00
Andrey Pangin
f496a167fe Release 1.7 2020-03-17 23:29:44 +03:00
Andrey Pangin
d9a1252550 JDK 14 compatibility: late load of libjava.so 2020-03-17 22:58:59 +03:00
Andrey Pangin
f3ca611267 Flush profiler output on 'version' command 2020-03-16 00:33:09 +03:00
Andrey Pangin
119da0fcb2 Replace unsafe calls to JVMTI GetStackTrace with manually patched AsyncGetCallTrace 2020-02-29 01:54:51 +03:00
Andrey Pangin
6bb7f749c9 Fixed build 2020-02-24 21:41:54 +03:00
Andrey Pangin
9593745098 #187: Filter stack traces by the given name pattern 2020-02-24 19:54:49 +03:00
Felix Barnsteiner
e891ecd9da Add NOTICE with CDDL license header (#301) 2020-02-19 20:20:23 +03:00
Andrey Pangin
b8493976b6 Version 1.7-ea3 2020-02-17 00:07:11 +03:00
Andrey Pangin
54b85dc718 'check' command to test if the specified event is available without starting profiler 2020-02-16 23:26:51 +03:00
Andrey Pangin
675a28fdc2 #300: Method invocation profiling by pattern: '-e java.io.File.*' 2020-02-16 20:52:53 +03:00
Andrey Pangin
1a4437999b Use different signals for cpu/wall/alloc to allow profiling multiple events at the same time 2020-02-16 20:00:59 +03:00
Andrey Pangin
ee2438e25f #277: Removed AsyncProfiler.getNativeThreadId() 2020-02-16 19:00:34 +03:00
Andrey Pangin
cd062fead9 Allow mangled function names, e.g. -e VMThread::execute 2020-02-16 18:11:02 +03:00
Andrey Pangin
156389f11a #133: An option to exclude native stack frames 2020-02-16 04:13:23 +03:00
Andrey Pangin
8373224395 #293: Allow shading of AsyncProfiler API (automatic discovery of the class with native methods) 2020-02-16 02:16:23 +03:00
Andrey Pangin
fe6c4ddeda Version 1.7-ea2 2020-02-13 03:15:28 +03:00
Andrey Pangin
98ac0c58d6 #281: Note about aqua/brown frames in an allocation Flame Graph 2020-02-13 02:35:45 +03:00
Andrey Pangin
b57106b858 Reduce the number of native methods in Java API 2020-02-13 02:16:52 +03:00
Andrey Pangin
c204e28348 #277: Record Java Thread ID in JFR output 2020-02-12 04:43:27 +03:00
Andrey Pangin
fc17386ec0 Display native thread names 2020-02-12 03:40:01 +03:00
Andrey Pangin
49d86abc6c #252: Record thread names in JFR output 2020-02-12 02:35:55 +03:00
Andrey Pangin
9fc97fc681 #279, #287, #296: Wall clock profiler improvements:
- stable interval
 - thread states (runnable vs. sleeping)
 - Java API to update set of monitored threads
2020-02-11 03:26:19 +03:00
Andrey Pangin
9b24fdef99 #290: Remove problematic optimization 2020-01-31 03:53:12 +03:00
Andrey Pangin
e37059d409 Version 1.7-ea 2020-01-27 01:49:06 +03:00
Andrey Pangin
869058b56b #295: Workaround for JDK 11 bug in ServerSocket.accept() 2020-01-27 01:28:47 +03:00
Andrey Pangin
11b0f4598e Correct stack traces when executing VM runtime code 2020-01-24 03:26:26 +03:00
Andrey Pangin
cc9cee7bec Fixed instrumentation of anonymous classes 2020-01-23 03:01:02 +03:00
Andrey Pangin
5ae46d2312 #290: CodeCache for compiled methods and runtime stubs must be handled separately 2020-01-21 03:34:52 +03:00
Andrey Pangin
776b5597bf #286: Enable CPU profiling on WSL 2020-01-07 20:38:00 +03:00
Andrey Pangin
e282b76880 #286: Add an error message if ITIMER_PROF is not supported 2020-01-07 19:14:18 +03:00
Andrey Pangin
e47d7f408f #277: getNativeThreadId() now returns tid of the current Thread 2020-01-07 18:36:19 +03:00
KUBOTA Yuji
5044869ecd Make release version of javac configurable (#285) 2020-01-06 18:59:58 +03:00
Andrey Pangin
9516f54311 Better C stack in allocation profiling mode 2020-01-06 05:17:56 +03:00
Andrey Pangin
9bd414411f --cstack option to collect native stack traces for Java-level events 2020-01-06 04:36:35 +03:00
Andrei Pangin
0334c5900e Profile invocations of an arbitrary Java method
I.e. record all stack traces, where the methods is called.
2020-01-06 03:20:31 +03:00
Andrey Pangin
f502979135 Enable allocation profiling on Zing 2019-12-23 00:34:04 +03:00
Andrey Pangin
d89fc3fbdc Java thread <-> VMThread bridge 2019-12-22 23:51:23 +03:00
Andrey Pangin
9279531cf8 #277: Java API for getting native thread ID 2019-12-22 21:59:26 +03:00
Per Lundberg
9de1a63542 README.md: minor grammar fixes (#283) 2019-12-18 14:07:25 +03:00
Andrey Pangin
21af257716 #271: Reduce the amount of unknown_Java even more 2019-12-17 01:49:06 +03:00
Andrey Pangin
28ed6f490e Intercept Thread.setNativeName 2019-12-17 01:34:57 +03:00
Andrey Pangin
dc4f01dd14 Removed syncwalk argument (no longer needed) 2019-12-13 03:57:58 +03:00
Andrey Pangin
1e3a4b77ee #271: Further reduce the amount of unknown/not_walkable frames 2019-12-13 03:52:00 +03:00
Andrey Pangin
4cec7a3bb0 #271: Reduce the amount of unknown_Java frames 2019-12-12 02:45:51 +03:00
tomgoren
2557363892 Update README with elevated permissions syntax updates and extra Docker runtime flag (#270) 2019-11-28 01:36:10 +03:00
Andrey Pangin
78035134f4 #263: Replace non-printable characters in function names 2019-11-23 18:22:18 +03:00
Andrey Pangin
0ef1122a3b #266: Fix [unknown] frames due to kptr_restrict 2019-11-17 05:36:47 +03:00
RoySunnySean007
8bb57de1d1 #265: Update README.md about installing debuginfo package 2019-11-14 13:09:59 +03:00
Andrey Pangin
11d74b73af #262: Fixed NativeLibrary.load0 signature on JDK 9+ (NoSuchMethodError) 2019-10-28 02:02:55 +03:00
Andrei Pangin
a759960bb0 Removed the note about reducing TLAB size
Reducing TLAB size in production is discouraged.
2019-10-26 04:29:26 +03:00
Andrey Pangin
7edcd2660a #256: Fixed crash on Zing JVM 2019-10-17 04:51:37 +03:00
Andrey Pangin
a97a5cae13 #255: Truncate too long signatures 2019-10-15 02:47:13 +03:00
Dmitry Timofeev
d2e7e2718c Document the placeholders in the file name (#254) 2019-10-12 18:51:08 +03:00
Andrey Pangin
e1c3100c60 Fixed lock profiling on some macOS JDK builds 2019-10-04 19:29:47 +03:00
Andrey Pangin
93c63d50d5 --sync-walk option to use alternative stack walker in expert mode 2019-09-30 21:04:42 +03:00
Andrey Pangin
7e6db636d8 #250: Print error message when failed to parse symbols due to the OS bug 2019-09-29 22:04:57 +03:00
Andrey Pangin
78a83a31b2 #250: Fixed mmap bug when parsing symbols 2019-09-28 12:07:39 +03:00
Andrey Pangin
adcf89234b Release 1.6 2019-09-09 16:52:39 +03:00
Andrey Pangin
b7e9e6b955 #192: Pause/resume profiling 2019-09-07 13:05:39 +03:00
Andrey Pangin
19e16dc973 #211: The agent autodetects output format by the filename 2019-09-03 03:38:20 +03:00
Andrey Pangin
84602f8660 Added download links to 1.6-ea 2019-09-02 03:28:29 +03:00
Andrey Pangin
f5850e6f3b 1.6-ea release 2019-09-02 02:50:25 +03:00
Andrey Pangin
c14f9a9feb #146: Manage sampling rate of allocation profiling 2019-09-02 02:31:00 +03:00
Andrey Pangin
bbad5d835b #213: An option to print method signatures (-g) 2019-09-02 00:47:37 +03:00
Andrey Pangin
f03fdae8df Updated FlameGraph script 2019-09-01 21:50:17 +03:00
Andrey Pangin
2159c7fd33 #242: [macos] Must not use pthread_mach_thread_np inside signal handler 2019-09-01 15:56:05 +03:00
Andrey Pangin
b66f920422 Reset PerfEvents ring buffer, even when cannot collect the stack trace 2019-08-29 00:30:09 +03:00
Andrey Pangin
45e53b83f9 #208: Expand %p (pid) and %t (timestamp) in the output file names 2019-08-27 03:25:46 +03:00
Andrey Pangin
d265d142e6 Fixed use of stale pointer when copying arguments 2019-08-26 02:49:55 +03:00
Andrey Pangin
5f94f6ee50 #229: Treat -f filename relative to the current shell directory 2019-08-25 20:57:51 +03:00
Andrey Pangin
e7150f1b5e #232: Increase maximum Java stack depth with -j option 2019-08-25 19:50:26 +03:00
Andrey Pangin
e89d41de54 #233: Parse symbols of JNI libraries loaded in runtime.
Improve second start time.
2019-08-25 18:24:11 +03:00
Andrey Pangin
7049adb202 #227: JFR fix 2019-08-11 17:21:47 +03:00
Andrey Pangin
99926e5c74 Java API execute() method may spoil JFR output 2019-07-16 23:50:15 +03:00
Andrey Pangin
37777104fd #220: 'profiler.sh status' shows wrong event 2019-05-25 22:28:28 +03:00
Andrey Pangin
a518d93ec8 Updated README to reflect #212, #218 2019-05-20 17:15:01 +03:00
Andrey Pangin
936a9fea8d #197: Can't get cache-misses on AArch64 2019-05-20 03:18:03 +03:00
Andrey Pangin
47c576e552 Fixed corner cases with no JVM debug symbols 2019-05-20 03:17:42 +03:00
Andrey Pangin
df05305642 #219: Fix JDK 11 macOS crash when profiling native thread 2019-05-20 02:42:38 +03:00
Janusz Dziemidowicz
d7a7a04684 Add Java 12 memory profiling support (#217) 2019-05-08 00:21:08 +03:00
Andrey Pangin
9c04d76392 #123, #163, #202: Include all AsyncGetCallTrace failures in the profile 2019-02-18 01:20:37 +03:00
Andrey Pangin
9df5518cd7 #199: Return error when failed to allocate frame buffer 2019-02-17 01:47:48 +03:00
Andrey Pangin
f77f2d1afb #200: Added JATTACH_PATH env variable to override attach socket directory 2019-02-17 01:29:22 +03:00
Andrey Pangin
cef015d25f Fixed race condition when creating perf_events descriptor 2019-01-25 04:10:57 +03:00
Andrey Pangin
861598538a #196: thread-smoke-test should do some real work 2019-01-24 01:54:20 +03:00
Andrey Pangin
34da66f6fd Updated comment about OpenJDK debug symbols 2019-01-22 03:36:43 +03:00
Dmitriy Dumanskiy
cc4126911e Extended troubleshooting section (#194) 2019-01-22 03:16:04 +03:00
Andrey Pangin
2898674a72 1.5 release: Updated download links 2019-01-08 23:30:44 +03:00
Andrey Pangin
52a1e36627 Added Events.WALL in Java API 2019-01-08 21:20:22 +03:00
Andrey Pangin
cfc026c709 1.5 release 2019-01-08 20:33:22 +03:00
Andrey Pangin
d78e0b3a51 #184: Use different engine by default when profiling on macOS 2019-01-08 02:06:05 +03:00
Andrey Pangin
60bcd54e5c Hardware events may not always support zero skid 2019-01-08 01:38:15 +03:00
Andrey Pangin
2829aa7f0a Fixes for Zing runtime 2019-01-07 21:33:45 +03:00
Andrey Pangin
b878a247a0 Allow negative breakpoint offsets 2019-01-06 00:59:27 +03:00
Andrey Pangin
18b23c892b README: profiling in a container. Updated troubleshooting section. 2019-01-05 19:21:52 +03:00
Andrey Pangin
e167d53755 'annotate' option should be false by default 2019-01-05 04:47:56 +03:00
Andrey Pangin
471f8115a8 #183: -a option to annotate method names with type hints 2019-01-05 04:40:48 +03:00
Andrey Pangin
ab0008c2d7 Fixed file modes 2019-01-05 01:42:09 +03:00
Andrey Pangin
02dc90587b #142: Improve thread names support.
Contributed by @KirillTim
2019-01-05 01:21:30 +03:00
Andrey Pangin
a2c70c99ba 'itimer' profiling mode (like 'cpu', but does not require perf_events support) 2018-12-31 01:34:26 +03:00
Andrey Pangin
7397e17e4d Added a paragraph about wall-clock profiling 2018-12-30 17:11:09 +03:00
Andrey Pangin
65ec4b4e19 #180: Crash in StackFrame::pop 2018-12-13 01:32:35 +03:00
Andrei Pangin
314e7220fb #69: Wall-clock profiling 2018-12-06 23:57:19 +03:00
Andrey Pangin
bbd6dd7d93 #177: Small fixes and improvements for --all-user / --all-kernel 2018-12-01 17:57:55 +03:00
jpbempel
4cdd2ed62d #177: Add --all-user and --all-kernel options 2018-12-01 13:23:09 +03:00
Andrey Pangin
f63a0283b0 #168: Fixed crash on CentOS 5 2018-11-16 00:16:23 +03:00
Andrey Pangin
5367472d2b Fixed JFR stack trace order 2018-11-09 00:35:52 +03:00
Andrey Pangin
78fe8d3a8b #167: Fixed compilation on OS X 10.8.5 2018-10-24 19:13:48 +03:00
Kirill Timofeev
50b3fcca7e fix thread names format (#165) 2018-10-14 21:00:21 +02:00
Andrey Pangin
9ca5e1bef5 #162: Reverted absolute path fix which is not compatible with busybox 2018-10-05 18:17:50 +03:00
Andrey Pangin
5b77d5b7a8 Removed redundant calls to abspath() 2018-10-04 01:40:03 +03:00
Ivan Zemlyanskiy
5b29f9a0f3 improve profiler.sh for work with symlink (#161) 2018-10-04 00:33:55 +02:00
SylvainJuge
628345063d use absolute path name for output file (#159) 2018-10-03 17:04:39 +02:00
Andrey Pangin
4201215a8d Cleaned some GCC 8 warnings 2018-09-29 17:28:47 +03:00
Andrey Pangin
202ef4af73 #158: Fixed permission problem accessing /proc/pid/root 2018-09-29 16:11:13 +03:00
Andrey Pangin
71af2046eb #157: Fixed crash in lock profiler on JDK 7 2018-09-27 03:16:56 +03:00
Andrey Pangin
fcf27dfd4f #155: Fixed deadlock in JFR recording caused by JVMTI call 2018-09-27 02:44:06 +03:00
Jaroslaw Odzga
5c56c42662 Add info on DebugNonSafepoints to README (#152) 2018-09-01 17:36:12 +03:00
Andrey Pangin
9ad4106834 Fixed broken thread names with CPU profiling 2018-08-20 00:54:02 +03:00
Andrey Pangin
c1d82605d9 #149: Fixed AGCT crash in 'call_stub' routine 2018-08-15 04:00:48 +03:00
Andrey Pangin
0a276856bc Allow breakpoints on private symbols 2018-08-08 01:54:23 +03:00
Andrey Pangin
7ae2ccdfde #148: Updated jattach version. Removed false warnings. 2018-08-07 02:06:25 +03:00
Andrey Pangin
ae6110c723 #145: Get rid of clock_gettime 2018-07-26 00:12:38 +03:00
Andrey Pangin
506364e24e #147: Attach to processes running under chroot 2018-07-25 00:18:05 +03:00
Andrei Pangin
152bf8c64d [travis] Use older dist to check that project builds with gcc 4.6.x (#144) 2018-07-17 02:07:02 +03:00
Andrei Pangin
2ceb1d1e95 Profile native code on macOS (#137)
* Parse all Mach-O symbols, including Fat binaries
* Get native call trace by walking stack manually
2018-07-03 12:47:02 +03:00
Holger Hoffstätte
bfa14445d5 README: add note about how to build icedtea with symbols on Gentoo (#140)
A brief yet not-completely-obvious note about how to get symbols into the 'icedtea' OpenJDK package build on Gentoo Linux.
2018-07-02 00:48:21 +03:00
Andrey Pangin
9f3f9749d7 Updated jattach 2018-06-29 03:08:01 +03:00
Andrey Pangin
a8acfb8e61 #136: Broken compilation on OS X El Capitan 2018-06-28 20:59:57 +03:00
Andrey Pustovetov
42272015f7 Pass JAVA_HOME variable to test scripts (#135) 2018-06-28 01:11:22 +03:00
Andrei Pangin
9a5f9610a4 Updated download links 2018-06-24 04:59:36 +03:00
Andrey Pangin
f72111cd1e Release 1.4 2018-06-24 04:42:49 +03:00
Andrey Pangin
0a78b871da API and command-line option (-v) to get profiler version 2018-06-24 03:49:01 +03:00
Andrey Pangin
d56dcf6680 Allow to specify interval units: ms, us, s or K, M, G 2018-06-24 00:56:51 +03:00
Andrey Pangin
129f7be8f0 Use thousands separator for long numbers 2018-06-23 23:49:20 +03:00
Andrey Pangin
980fc3f0bd Use thousands separator for long numbers 2018-06-23 23:36:55 +03:00
Andrey Pangin
300559c9a3 Warn about missing JVM debug symbols 2018-06-23 20:24:23 +03:00
Andrey Pangin
66a4086840 Profiler crashes when changing pid_max in runtime 2018-06-23 20:11:43 +03:00
Andrey Pangin
597448337e Tree builder performance optimization 2018-06-23 03:13:47 +03:00
Andrey Pangin
5d702b7826 #124: Minor change of output format 2018-06-18 18:11:16 +03:00
Andrey Pangin
ac1622c1dc #113: Raise an error when perf_event_open is not available 2018-06-15 01:35:53 +03:00
Andrey Pangin
9ce53696c6 #124: Add units to profiler output 2018-06-15 01:00:27 +03:00
Andrey Pangin
ae87f279c4 Reduce default sampling interval to 10ms 2018-06-14 21:31:10 +03:00
Andrey Pangin
411be9290c #132: FlameGraph includes samples missed due to frame buffer overflow 2018-06-14 20:43:50 +03:00
Andrey Pangin
2edeaf7dbe Makefile automatically sets JAVA_HOME if empty 2018-06-13 19:20:10 +03:00
Andrey Pangin
d8a389a579 Updated jattach to v1.2 for better container support 2018-06-13 02:55:33 +03:00
Andrey Pangin
28dab73701 Flight Recorder format support (.jfr) 2018-06-07 13:47:20 +08:00
Andrey Pangin
a3c647791c Small call tree fixes and cleanup 2018-06-07 11:28:05 +08:00
rpulle
1ee03337d6 Tree expansion performance improvement for Firefox (#122) 2018-06-06 05:22:37 +03:00
Dave Syer
8ea7bddcb2 [doc] Explain how to launch as an agent (#128) 2018-06-01 12:36:36 +03:00
rpulle
af94b0e551 Fixed #120 2018-05-23 21:07:29 +03:00
rpulle
d43c22b8f7 Interactive Call tree and Backtrace tree in HTML format 2018-05-23 02:23:20 +03:00
Andrey Pangin
78c501bcee [refactoring] Named palette 2018-05-21 17:59:42 +03:00
Andrei Pangin
9b077a4f89 Update README.md 2018-05-14 00:14:03 +03:00
Andrey Pangin
c0078cfd4d File mode 2018-05-14 00:03:34 +03:00
Andrey Pangin
62f973cb0b Release 1.3 2018-05-13 23:56:20 +03:00
Andrei Pangin
0a343a346c Allow profiling any Java process when run under root (#116)
jattach automatically changes current user/group to match target process owner
2018-05-11 00:56:23 +03:00
Andrey Pangin
a63169a389 Minor stylistic changes 2018-05-11 00:49:48 +03:00
rpulle
af09330ac9 Option to limit java stack depth to reduce CPU overhead (#115) 2018-05-10 22:30:34 +03:00
Andrey Pangin
3581a7044c Automatically adjust FlameGraph height for the given granularity 2018-05-07 03:20:23 +03:00
Andrey Pangin
5297ee208f #99, #51, #60: Fixed accuracy of heap profiling on latest JDK 8,9,10,11 2018-05-07 00:34:13 +03:00
Andrei Pangin
a5251dd2c7 User-specified hardware breakpoints (#104) 2018-04-04 23:18:45 +03:00
Andrey Pangin
c7088a4845 #101: Use sysconf() to find the PAGE_SIZE. It is not always 4K. 2018-03-28 02:40:19 +03:00
Andrey Pangin
3438591f5a #103: Hint about missing dump options 2018-03-27 23:56:35 +03:00
Andrey Pangin
e23ec274d0 #97: README note about too short profiling interval 2018-03-27 23:50:46 +03:00
Andrey Pangin
43f7ceaa1c #93: Javadoc + Predefined event names 2018-03-27 23:37:31 +03:00
Andrey Pangin
aac31eb33f #93: Java API execute() method supports 'file' argument 2018-03-21 03:33:40 +03:00
Andrey Pangin
7f752d62c5 #98: Do not treat external signals as profiling events 2018-03-21 00:09:38 +03:00
Andrei Pangin
908043310f Update README.md 2018-03-17 22:07:12 +03:00
Andrey Pangin
66105382e6 Added troubleshooting paragraph about "Failed to inject" error. 2018-03-17 22:05:11 +03:00
Andrey Pangin
ab4d4c2ad0 #94: Corrupted SVG due to Locale settings 2018-03-15 00:25:19 +03:00
Andrey Pangin
0c1dd39483 #92: 'make' also builds Java API 2018-03-07 01:08:33 +03:00
102 changed files with 11118 additions and 2651 deletions

0
.gitattributes vendored Executable file → Normal file
View File

4
.gitignore vendored
View File

@@ -1,5 +1,7 @@
/build/
/nbproject/
/out/
/.idea/
*.class
/test/*.class
.vscode
*.iml

View File

@@ -1,6 +1,6 @@
language: cpp
dist: trusty
dist: precise
sudo: required

View File

@@ -1,5 +1,161 @@
# Changelog
## [2.0-b1] - Early access
### Features
- Profile multiple events together (cpu + alloc + lock)
- HTML 5 Flame Graphs: faster rendering, smaller size
- JFR v2 output format, compatible with FlightRecorder API
- Automatically turn profiling on/off at `--begin`/`--end` functions
- Time-to-safepoint profiling
### Improvements
- Unlimited frame buffer. Removed `-b` option and 64K stack traces limit
- Record CPU load in JFR format
### Changes
- Removed non-ASL code. No more CDDL license
## [1.8.3] - 2021-01-06
### Improvements
- libasyncProfiler.dylib symlink on macOS
### Bug fixes
- Fixed possible deadlock on non-HotSpot JVMs
- Gracefully stop profiler when terminating JVM
- Fixed GetStackTrace problem after RedefineClasses
## [1.8.2] - 2020-11-02
### Improvements
- AArch64 build is now provided out of the box
- Compatibility with JDK 15 and JDK 16
### Bug fixes
- More careful native stack walking in wall-clock mode
- `resume` command is not compatible with JFR format
- Wrong allocation sizes on JDK 8u262
## [1.8.1] - 2020-09-05
### Improvements
- Possibility to specify application name instead of `pid` (contributed by @yuzawa-san)
### Bug fixes
- Fixed long attach time and slow class loading on JDK 8
- `UnsatisfiedLinkError` during Java method profiling
- Avoid reading `/proc/kallsyms` when `--all-user` is specified
## [1.8] - 2020-08-10
### Features
- Converters between different output formats:
- JFR -> nflx (FlameScope)
- Collapsed stacks -> HTML 5 Flame Graph
### Improvements
- `profiler.sh` no longer requires bash (contributed by @cfstras)
- Fixed long attach time and slow class loading on JDK 8
- Fixed deadlocks in wall-clock profiling mode
- Per-thread reverse Flame Graph and Call Tree
- ARM build now works with ARM and THUMB flavors of JDK
### Changes
- Release package is extracted into a separate folder
## [1.7.1] - 2020-05-14
### Features
- LBR call stack support (available since Haswell)
### Improvements
- `--filter` to profile only specified thread IDs in wall-clock mode
- `--safe-mode` to disable selected stack recovery techniques
## [1.7] - 2020-03-17
### Features
- Profile invocations of arbitrary Java methods
- Filter stack traces by the given name pattern
- Java API to filter monitored threads
- `--cstack`/`--no-cstack` option
### Improvements
- Thread names and Java thread IDs in JFR output
- Wall clock profiler distinguishes RUNNABLE vs. SLEEPING threads
- Stable profiling interval in wall clock mode
- C++ function names as events, e.g. `-e VMThread::execute`
- `check` command to test event availability
- Allow shading of AsyncProfiler API
- Enable CPU profiling on WSL
- Enable allocation profiling on Zing
- Reduce the amount of `unknown_Java` samples
## [1.6] - 2019-09-09
### Features
- Pause/resume profiling
- Allocation profiling support for JDK 12, 13 (contributed by @rraptorr)
### Improvements
- Include all AsyncGetCallTrace failures in the profile
- Parse symbols of JNI libraries loaded in runtime
- The agent autodetects output format by the file extension
- Output file name patterns: `%p` and `%t`
- `-g` option to print method signatures
- `-j` can increase the maximum Java stack depth
- Allocaton sampling rate can be adjusted with `-i`
- Improved reliability on macOS
### Changes
- `-f` file names are now relative to the current shell directory
## [1.5] - 2019-01-08
### Features
- Wall-clock profiler: `-e wall`
- `-e itimer` mode for systems that do not support perf_events
- Native stack traces on macOS
- Support for Zing runtime, except allocation profiling
### Improvements
- `--all-user` option to allow profiling with restricted
`perf_event_paranoid` (contributed by @jpbempel)
- `-a` option to annotate method names
- Improved attach to containerized and chroot'ed JVMs
- Native function profiling now accepts non-public symbols
- Better mapping of Java thread names (contributed by @KirillTim)
### Changes
- Changed default profiling engine on macOS
- Fixed the order of stack frames in JFR format
## [1.4] - 2018-06-24
### Features
- Interactive Call tree and Backtrace tree in HTML format (contributed by @rpulle)
- Experimental support for Java Flight Recorder (JFR) compatible output
### Improvements
- Added units: `ms`, `us`, `s` and multipliers: `K`, `M`, `G` for interval argument
- API and command-line option `-v` for profiler version
- Allow profiling containerized JVMs on older kernels
### Changes
- Default CPU sampling interval reduced to 10 ms
- Changed the text format of flat profile
## [1.3] - 2018-05-13
### Features
- Profiling of native functions, e.g. malloc
### Improvements
- JDK 9, 10, 11 support for heap profiling with accurate stack traces
- `root` can now profile Java processes of any user
- `-j` option for limiting Java stack depth
## [1.2] - 2018-03-05
### Features

BIN
JavaHome.class Normal file

Binary file not shown.

106
Makefile Executable file → Normal file
View File

@@ -1,54 +1,112 @@
RELEASE_TAG=1.2
LIB_PROFILER=libasyncProfiler.so
PROFILER_VERSION=2.0-b1
JATTACH_VERSION=1.5
JAVAC_RELEASE_VERSION=6
PACKAGE_NAME=async-profiler-$(PROFILER_VERSION)-$(OS_TAG)-$(ARCH_TAG)
PACKAGE_DIR=/tmp/$(PACKAGE_NAME)
LIB_PROFILER=libasyncProfiler.$(SOEXT)
LIB_PROFILER_SO=libasyncProfiler.so
JATTACH=jattach
PROFILER_JAR=async-profiler.jar
CC=gcc
CFLAGS=-O2
CPP=g++
CPPFLAGS=-O2
API_JAR=async-profiler.jar
CONVERTER_JAR=converter.jar
CFLAGS=-O3 -fno-omit-frame-pointer -fvisibility=hidden
CXXFLAGS=-O3 -fno-omit-frame-pointer -fvisibility=hidden
INCLUDES=-I$(JAVA_HOME)/include
LIBS=-ldl -lpthread
JAVAC=$(JAVA_HOME)/bin/javac
JAR=$(JAVA_HOME)/bin/jar
SOURCES := $(wildcard src/*.cpp)
HEADERS := $(wildcard src/*.h)
API_SOURCES := $(wildcard src/api/one/profiler/*.java)
CONVERTER_SOURCES := $(shell find src/converter -name '*.java')
ifeq ($(JAVA_HOME),)
export JAVA_HOME:=$(shell java -cp . JavaHome)
endif
OS:=$(shell uname -s)
ifeq ($(OS), Darwin)
CPPFLAGS += -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE
CXXFLAGS += -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE
INCLUDES += -I$(JAVA_HOME)/include/darwin
RELEASE_TAG:=$(RELEASE_TAG)-macos-x64
SOEXT=dylib
OS_TAG=macos
else
LIBS += -lrt
INCLUDES += -I$(JAVA_HOME)/include/linux
RELEASE_TAG:=$(RELEASE_TAG)-linux-x64
SOEXT=so
ifeq ($(findstring musl,$(shell ldd /bin/ls)),musl)
OS_TAG=linux-musl
else
OS_TAG=linux
endif
endif
ARCH:=$(shell uname -m)
ifeq ($(ARCH),x86_64)
ARCH_TAG=x64
else
ifeq ($(findstring arm,$(ARCH)),arm)
ARCH_TAG=arm
else
ifeq ($(findstring aarch64,$(ARCH)),aarch64)
ARCH_TAG=aarch64
else
ARCH_TAG=x86
endif
endif
endif
.PHONY: all release test clean
all: build build/$(LIB_PROFILER) build/$(JATTACH)
all: build build/$(LIB_PROFILER) build/$(JATTACH) build/$(API_JAR) build/$(CONVERTER_JAR)
release: async-profiler-$(RELEASE_TAG).zip
release: build $(PACKAGE_NAME).tar.gz
async-profiler-$(RELEASE_TAG).zip: build build/$(LIB_PROFILER) build/$(JATTACH) \
build/$(PROFILER_JAR) profiler.sh LICENSE *.md
zip -r $@ $^
$(PACKAGE_NAME).tar.gz: build/$(LIB_PROFILER) build/$(JATTACH) \
build/$(API_JAR) build/$(CONVERTER_JAR) \
profiler.sh LICENSE *.md
mkdir -p $(PACKAGE_DIR)
cp -RP build profiler.sh LICENSE *.md $(PACKAGE_DIR)
chmod -R 755 $(PACKAGE_DIR)
chmod 644 $(PACKAGE_DIR)/LICENSE $(PACKAGE_DIR)/*.md $(PACKAGE_DIR)/build/*.jar
tar cvzf $@ -C $(PACKAGE_DIR)/.. $(PACKAGE_NAME)
rm -r $(PACKAGE_DIR)
%.$(SOEXT): %.so
-ln -s $(<F) $@
build:
mkdir -p build
build/$(LIB_PROFILER): src/*.cpp src/*.h
$(CPP) $(CPPFLAGS) $(INCLUDES) -fPIC -shared -o $@ src/*.cpp -ldl -lpthread
build/$(LIB_PROFILER_SO): $(SOURCES) $(HEADERS)
$(CXX) $(CXXFLAGS) -DPROFILER_VERSION=\"$(PROFILER_VERSION)\" $(INCLUDES) -fPIC -shared -o $@ $(SOURCES) $(LIBS)
build/$(JATTACH): src/jattach/jattach.c
$(CC) $(CFLAGS) -o $@ $^
$(CC) $(CFLAGS) -DJATTACH_VERSION=\"$(JATTACH_VERSION)\" -o $@ $^
build/$(PROFILER_JAR): src/java/one/profiler/*.java
mkdir -p build/classes
$(JAVAC) -source 6 -target 6 -d build/classes $^
$(JAR) cvf $@ -C build/classes .
rm -rf build/classes
build/$(API_JAR): $(API_SOURCES)
mkdir -p build/api
$(JAVAC) -source $(JAVAC_RELEASE_VERSION) -target $(JAVAC_RELEASE_VERSION) -d build/api $^
$(JAR) cvf $@ -C build/api .
$(RM) -r build/api
build/$(CONVERTER_JAR): $(CONVERTER_SOURCES) src/converter/MANIFEST.MF
mkdir -p build/converter
$(JAVAC) -source 7 -target 7 -d build/converter $(CONVERTER_SOURCES)
$(JAR) cvfm $@ src/converter/MANIFEST.MF -C build/converter .
$(RM) -r build/converter
test: all
test/smoke-test.sh
test/thread-smoke-test.sh
test/alloc-smoke-test.sh
test/load-library-test.sh
echo "All tests passed"
clean:
rm -rf build
$(RM) -r build

268
README.md Executable file → Normal file
View File

@@ -1,283 +1,53 @@
# async-profiler
[![Build Status](https://travis-ci.org/jvm-profiling-tools/async-profiler.svg?branch=master)](https://travis-ci.org/jvm-profiling-tools/async-profiler)
This project is a low overhead sampling profiler for Java
that does not suffer from [Safepoint bias problem](http://psy-lob-saw.blogspot.ru/2016/02/why-most-sampling-java-profilers-are.html).
It features HotSpot-specific APIs to collect stack traces
and to track memory allocations. The profiler works with
OpenJDK, Oracle JDK and other Java runtimes based on HotSpot JVM.
OpenJDK, Oracle JDK and other Java runtimes based on the HotSpot JVM.
async-profiler can trace the following kinds of events:
- CPU cycles
- Hardware and Software performance counters like cache misses, branch misses, page faults, context switches etc.
- Allocations in Java Heap
- Contented lock attempts, including both Java object monitors and ReentrantLocks
## Usage
## CPU profiling
See our [Wiki](https://github.com/jvm-profiling-tools/async-profiler/wiki) or [3 hours playlist](https://www.youtube.com/playlist?list=PLNCLTEx3B8h4Yo_WvKWdLvI9mj1XpTKBr) to learn about all set of features.
In this mode profiler collects stack trace samples that include **Java** methods,
**native** calls, **JVM** code and **kernel** functions.
## Download
The general approach is receiving call stacks generated by `perf_events`
and matching them up with call stacks generated by `AsyncGetCallTrace`,
in order to produce an accurate profile of both Java and native code.
Additionally, async-profiler provides a workaround to recover stack traces
in some [corner cases](https://bugs.openjdk.java.net/browse/JDK-8178287)
where `AsyncGetCallTrace` fails.
Latest release (1.8.3):
This approach has the following advantages compared to using `perf_events`
directly with a Java agent that translates addresses to Java method names:
- Linux x64 (glibc): [async-profiler-1.8.3-linux-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.8.3/async-profiler-1.8.3-linux-x64.tar.gz)
- Linux x86 (glibc): [async-profiler-1.8.3-linux-x86.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.8.3/async-profiler-1.8.3-linux-x86.tar.gz)
- Linux x64 (musl): [async-profiler-1.8.3-linux-musl-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.8.3/async-profiler-1.8.3-linux-musl-x64.tar.gz)
- Linux ARM: [async-profiler-1.8.3-linux-arm.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.8.3/async-profiler-1.8.3-linux-arm.tar.gz)
- Linux AArch64: [async-profiler-1.8.3-linux-aarch64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.8.3/async-profiler-1.8.3-linux-aarch64.tar.gz)
- macOS x64: [async-profiler-1.8.3-macos-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.8.3/async-profiler-1.8.3-macos-x64.tar.gz)
* Works on older Java versions because it doesn't require
`-XX:+PreserveFramePointer`, which is only available in JDK 8u60 and later.
[Early access](https://github.com/jvm-profiling-tools/async-profiler/releases/tag/v2.0-b1) (2.0-b1):
* Does not introduce the performance overhead from `-XX:+PreserveFramePointer`,
which can in rare cases be as high as 10%.
- Linux x64 (glibc): [async-profiler-2.0-b1-linux-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v2.0-b1/async-profiler-2.0-b1-linux-x64.tar.gz)
- macOS x64: [async-profiler-2.0-b1-macos-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v2.0-b1/async-profiler-2.0-b1-macos-x64.tar.gz)
* Does not require generating a map file to map Java code addresses to method
names.
* Works with interpreter frames.
* Does not require writing out a perf.data file for further processing in
user space scripts.
## ALLOCATION profiling
Instead of detecting CPU-consuming code, the profiler can be configured
to collect call sites where the largest amount of heap memory is allocated.
async-profiler does not use intrusive techniques like bytecode instrumentation
or expensive DTrace probes which have significant performance impact.
It also does not affect Escape Analysis or prevent from JIT optimizations
like allocation elimination. Only actual heap allocations are measured.
The profiler features TLAB-driven sampling. It relies on HotSpot-specific
callbacks to receive two kinds of notifications:
- when an object is allocated in a newly created TLAB;
- when an object is allocated on a slow path outside TLAB.
This means not each allocation is counted, but only allocations every _N_ kB,
where _N_ is the average size of TLAB. This makes heap sampling very cheap
and suitable for production. On the other hand, the collected data
may be incomplete, though in practice it will often reflect the top allocation
sources.
Unlike Java Mission Control which uses similar approach, async-profiler
does not require Java Flight Recorder or any other JDK commercial feature.
It is completely based on open source technologies and it works with OpenJDK.
The minimum supported JDK version is 7u40 where the TLAB callbacks appeared.
Heap profiler requires HotSpot debug symbols. Oracle JDK already has them
embedded in `libjvm.so`, but in OpenJDK builds they are typically shipped
in a separate package. For example, to install OpenJDK debug symbols on
Debian / Ubuntu, run
```
# apt-get install openjdk-8-dbg
```
[Previous releases](https://github.com/jvm-profiling-tools/async-profiler/releases)
## Supported platforms
- **Linux** / x64 / x86 / ARM / AArch64
- **macOS** / x64
Note: macOS profiling is limited only to Java code, since native stack walking relies on `perf_events` API which is available only on Linux platforms.
Note: macOS profiling is limited to user space code only.
## Building
Build status: [![Build Status](https://travis-ci.org/jvm-profiling-tools/async-profiler.svg?branch=master)](https://travis-ci.org/jvm-profiling-tools/async-profiler)
Make sure the `JAVA_HOME` environment variable points to your JDK installation,
and then run `make`. GCC is required. After building, the profiler agent binary
will be in the `build` subdirectory. Additionally, a small application `jattach`
that can load the agent into the target process will also be compiled to the
`build` subdirectory.
## Basic Usage
As of Linux 4.6, capturing kernel call stacks using `perf_events` from a non-
root process requires setting two runtime variables. You can set them using
sysctl or as follows:
```
# echo 1 > /proc/sys/kernel/perf_event_paranoid
# echo 0 > /proc/sys/kernel/kptr_restrict
```
To run the agent and pass commands to it, the helper script `profiler.sh`
is provided. A typical workflow would be to launch your Java application,
attach the agent and start profiling, exercise your performance scenario, and
then stop profiling. The agent's output, including the profiling results, will
be displayed in the Java application's standard output.
Example:
```
$ jps
9234 Jps
8983 Computey
$ ./profiler.sh start 8983
$ ./profiler.sh stop 8983
```
Alternatively, you may specify `-d` (duration) argument to profile
the application for a fixed period of time with a single command.
```
$ ./profiler.sh -d 30 8983
```
By default, the profiling frequency is 1000Hz (every 1ms of CPU time).
Here is a sample of the output printed to the Java application's terminal:
```
--- Execution profile ---
Total: 687
Unknown (native): 1 (0.15%)
Samples: 679 (98.84%)
[ 0] Primes.isPrime
[ 1] Primes.primesThread
[ 2] Primes.access$000
[ 3] Primes$1.run
[ 4] java.lang.Thread.run
... a lot of output omitted for brevity ...
679 (98.84%) Primes.isPrime
4 (0.58%) __do_softirq
... more output omitted ...
```
This indicates that the hottest method was `Primes.isPrime`, and the hottest
call stack leading to it comes from `Primes.primesThread`.
## Flame Graph visualization
async-profiler provides out-of-the-box [Flame Graph](https://github.com/BrendanGregg/FlameGraph) support.
Specify `-o svg` argument to dump profiling results as an interactive SVG
immediately viewable in all mainstream browsers.
Also, SVG output format will be chosen automatically if the target
filename ends with `.svg`.
```
$ jps
9234 Jps
8983 Computey
$ ./profiler.sh -d 30 -f /tmp/flamegraph.svg 8983
```
![Example](https://github.com/jvm-profiling-tools/async-profiler/blob/master/demo/SwingSet2.svg)
## Profiler Options
The following is a complete list of the command-line options accepted by
`profiler.sh` script.
* `start` - starts profiling in semi-automatic mode, i.e. profiler will run
until `stop` command is explicitly called.
* `stop` - stops profiling and prints the report.
* `status` - prints profiling status: whether profiler is active and
for how long.
* `list` - show the list of available profiling events. This option still
requires PID, since supported events may differ depending on JVM version.
* `-d N` - the profiling duration, in seconds. If no `start`, `stop`
or `status` option is given, the profiler will run for the specified period
of time and then automatically stop.
Example: `./profiler.sh -d 30 8983`
* `-e event` - the profiling event: `cpu`, `alloc`, `lock`, `cache-misses` etc.
Use `list` to see the complete list of available events.
In allocation profiling mode the top frame of every call trace is the class
of the allocated object, and the counter is the heap pressure (the total size
of allocated TLABs or objects outside TLAB).
In lock profiling mode the top frame is the class of lock/monitor, and
the counter is number of nanoseconds it took to enter this lock/monitor.
* `-i N` - sets the profiling interval, in nanoseconds. Only CPU active time
is counted. No samples are collected while CPU is idle. The default is
1000000 (1ms).
Example: `./profiler.sh -i 100000 8983`
* `-b N` - sets the frame buffer size, in the number of Java
method ids that should fit in the buffer. If you receive messages about an
insufficient frame buffer size, increase this value from the default.
Example: `./profiler.sh -b 5000000 8983`
* `-t` - profile threads separately. Each stack trace will end with a frame
that denotes a single thread.
Example: `./profiler.sh -t 8983`
* `-s` - print simple class names instead of FQN.
* `-o fmt[,fmt...]` - specifies what information to dump when profiling ends.
This is a comma-separated list of the following options:
- `summary` - dump basic profiling statistics;
- `traces[=N]` - dump call traces (at most N samples);
- `flat[=N]` - dump flat profile (top N hot methods);
- `collapsed[=C]` - dump collapsed call traces in the format used by
[FlameGraph](https://github.com/brendangregg/FlameGraph) script. This is
a collection of call stacks, where each line is a semicolon separated list
of frames followed by a counter.
- `svg[=C]` - produce Flame Graph in SVG format.
`C` is a counter type:
- `samples` - the counter is a number of samples for the given trace;
- `total` - the counter is a total value of collected metric, e.g. total allocation size.
The default format is `summary,traces=200,flat=200`.
* `--title TITLE`, `--width PX`, `--height PX`, `--minwidth PX`, `--reverse` - FlameGraph parameters.
Example: `./profiler.sh -f profile.svg --title "Sample CPU profile" --minwidth 0.5 8983`
* `-f FILENAME` - the file name to dump the profile information to.
Example: `./profiler.sh -o collapsed -f /tmp/traces.txt 8983`
## Restrictions/Limitations
* On most Linux systems, `perf_events` captures call stacks with a maximum depth
of 127 frames. On recent Linux kernels, this can be configured using
`sysctl kernel.perf_event_max_stack` or by writing to the
`/proc/sys/kernel/perf_event_max_stack` file.
* Profiler allocates 8kB perf_event buffer for each thread of the target process.
Make sure `/proc/sys/kernel/perf_event_mlock_kb` value is large enough
(more than `8 * threads`) when running under unprivileged user.
Otherwise the message _"perf_event mmap failed: Operation not permitted"_
will be printed, and no native stack traces will be collected.
* There is no bullet-proof guarantee that the `perf_events` overflow signal
is delivered to the Java thread in a way that guarantees no other code has run,
which means that in some rare cases, the captured Java stack might not match
the captured native (user+kernel) stack.
* You will not see the non-Java frames _preceding_ the Java frames on the
stack. For example, if `start_thread` called `JavaMain` and then your Java
code started running, you will not see the first two frames in the resulting
stack. On the other hand, you _will_ see non-Java frames (user and kernel)
invoked by your Java code.
* No Java stacks will be collected if `-XX:MaxJavaStackTraceDepth` is zero
or negative.
## Troubleshooting
`Could not start attach mechanism: No such file or directory` means that the profiler cannot establish communication with the target JVM through UNIX domain socket.
For the profiler to be able to access JVM, make sure
1. You run profiler under exactly the same user as the owner of target JVM process.
2. `/tmp` directory of Java process is physically the same directory as `/tmp` of your shell.
3. JVM is not run with `-XX:+DisableAttachMechanism` option.
---
`[frame_buffer_overflow]` in the output means there was not enough space
to store all call traces. Consider increasing frame buffer size
with `-b` option.

View File

@@ -1,358 +0,0 @@
COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
1. Definitions.
1.1. "Contributor" means each individual or entity that
creates or contributes to the creation of Modifications.
1.2. "Contributor Version" means the combination of the
Original Software, prior Modifications used by a
Contributor (if any), and the Modifications made by that
particular Contributor.
1.3. "Covered Software" means (a) the Original Software, or
(b) Modifications, or (c) the combination of files
containing Original Software with files containing
Modifications, in each case including portions thereof.
1.4. "Executable" means the Covered Software in any form
other than Source Code.
1.5. "Initial Developer" means the individual or entity
that first makes Original Software available under this
License.
1.6. "Larger Work" means a work which combines Covered
Software or portions thereof with code not governed by the
terms of this License.
1.7. "License" means this document.
1.8. "Licensable" means having the right to grant, to the
maximum extent possible, whether at the time of the initial
grant or subsequently acquired, any and all of the rights
conveyed herein.
1.9. "Modifications" means the Source Code and Executable
form of any of the following:
A. Any file that results from an addition to,
deletion from or modification of the contents of a
file containing Original Software or previous
Modifications;
B. Any new file that contains any part of the
Original Software or previous Modification; or
C. Any new file that is contributed or otherwise made
available under the terms of this License.
1.10. "Original Software" means the Source Code and
Executable form of computer software code that is
originally released under this License.
1.11. "Patent Claims" means any patent claim(s), now owned
or hereafter acquired, including without limitation,
method, process, and apparatus claims, in any patent
Licensable by grantor.
1.12. "Source Code" means (a) the common form of computer
software code in which modifications are made and (b)
associated documentation included in or with such code.
1.13. "You" (or "Your") means an individual or a legal
entity exercising rights under, and complying with all of
the terms of, this License. For legal entities, "You"
includes any entity which controls, is controlled by, or is
under common control with You. For purposes of this
definition, "control" means (a) the power, direct or
indirect, to cause the direction or management of such
entity, whether by contract or otherwise, or (b) ownership
of more than fifty percent (50%) of the outstanding shares
or beneficial ownership of such entity.
2. License Grants.
2.1. The Initial Developer Grant.
Conditioned upon Your compliance with Section 3.1 below and
subject to third party intellectual property claims, the
Initial Developer hereby grants You a world-wide,
royalty-free, non-exclusive license:
(a) under intellectual property rights (other than
patent or trademark) Licensable by Initial Developer,
to use, reproduce, modify, display, perform,
sublicense and distribute the Original Software (or
portions thereof), with or without Modifications,
and/or as part of a Larger Work; and
(b) under Patent Claims infringed by the making,
using or selling of Original Software, to make, have
made, use, practice, sell, and offer for sale, and/or
otherwise dispose of the Original Software (or
portions thereof).
(c) The licenses granted in Sections 2.1(a) and (b)
are effective on the date Initial Developer first
distributes or otherwise makes the Original Software
available to a third party under the terms of this
License.
(d) Notwithstanding Section 2.1(b) above, no patent
license is granted: (1) for code that You delete from
the Original Software, or (2) for infringements
caused by: (i) the modification of the Original
Software, or (ii) the combination of the Original
Software with other software or devices.
2.2. Contributor Grant.
Conditioned upon Your compliance with Section 3.1 below and
subject to third party intellectual property claims, each
Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
(a) under intellectual property rights (other than
patent or trademark) Licensable by Contributor to
use, reproduce, modify, display, perform, sublicense
and distribute the Modifications created by such
Contributor (or portions thereof), either on an
unmodified basis, with other Modifications, as
Covered Software and/or as part of a Larger Work; and
(b) under Patent Claims infringed by the making,
using, or selling of Modifications made by that
Contributor either alone and/or in combination with
its Contributor Version (or portions of such
combination), to make, use, sell, offer for sale,
have made, and/or otherwise dispose of: (1)
Modifications made by that Contributor (or portions
thereof); and (2) the combination of Modifications
made by that Contributor with its Contributor Version
(or portions of such combination).
(c) The licenses granted in Sections 2.2(a) and
2.2(b) are effective on the date Contributor first
distributes or otherwise makes the Modifications
available to a third party.
(d) Notwithstanding Section 2.2(b) above, no patent
license is granted: (1) for any code that Contributor
has deleted from the Contributor Version; (2) for
infringements caused by: (i) third party
modifications of Contributor Version, or (ii) the
combination of Modifications made by that Contributor
with other software (except as part of the
Contributor Version) or other devices; or (3) under
Patent Claims infringed by Covered Software in the
absence of Modifications made by that Contributor.
3. Distribution Obligations.
3.1. Availability of Source Code.
Any Covered Software that You distribute or otherwise make
available in Executable form must also be made available in
Source Code form and that Source Code form must be
distributed only under the terms of this License. You must
include a copy of this License with every copy of the
Source Code form of the Covered Software You distribute or
otherwise make available. You must inform recipients of any
such Covered Software in Executable form as to how they can
obtain such Covered Software in Source Code form in a
reasonable manner on or through a medium customarily used
for software exchange.
3.2. Modifications.
The Modifications that You create or to which You
contribute are governed by the terms of this License. You
represent that You believe Your Modifications are Your
original creation(s) and/or You have sufficient rights to
grant the rights conveyed by this License.
3.3. Required Notices.
You must include a notice in each of Your Modifications
that identifies You as the Contributor of the Modification.
You may not remove or alter any copyright, patent or
trademark notices contained within the Covered Software, or
any notices of licensing or any descriptive text giving
attribution to any Contributor or the Initial Developer.
3.4. Application of Additional Terms.
You may not offer or impose any terms on any Covered
Software in Source Code form that alters or restricts the
applicable version of this License or the recipients'
rights hereunder. You may choose to offer, and to charge a
fee for, warranty, support, indemnity or liability
obligations to one or more recipients of Covered Software.
However, you may do so only on Your own behalf, and not on
behalf of the Initial Developer or any Contributor. You
must make it absolutely clear that any such warranty,
support, indemnity or liability obligation is offered by
You alone, and You hereby agree to indemnify the Initial
Developer and every Contributor for any liability incurred
by the Initial Developer or such Contributor as a result of
warranty, support, indemnity or liability terms You offer.
3.5. Distribution of Executable Versions.
You may distribute the Executable form of the Covered
Software under the terms of this License or under the terms
of a license of Your choice, which may contain terms
different from this License, provided that You are in
compliance with the terms of this License and that the
license for the Executable form does not attempt to limit
or alter the recipient's rights in the Source Code form
from the rights set forth in this License. If You
distribute the Covered Software in Executable form under a
different license, You must make it absolutely clear that
any terms which differ from this License are offered by You
alone, not by the Initial Developer or Contributor. You
hereby agree to indemnify the Initial Developer and every
Contributor for any liability incurred by the Initial
Developer or such Contributor as a result of any such terms
You offer.
3.6. Larger Works.
You may create a Larger Work by combining Covered Software
with other code not governed by the terms of this License
and distribute the Larger Work as a single product. In such
a case, You must make sure the requirements of this License
are fulfilled for the Covered Software.
4. Versions of the License.
4.1. New Versions.
Sun Microsystems, Inc. is the initial license steward and
may publish revised and/or new versions of this License
from time to time. Each version will be given a
distinguishing version number. Except as provided in
Section 4.3, no one other than the license steward has the
right to modify this License.
4.2. Effect of New Versions.
You may always continue to use, distribute or otherwise
make the Covered Software available under the terms of the
version of the License under which You originally received
the Covered Software. If the Initial Developer includes a
notice in the Original Software prohibiting it from being
distributed or otherwise made available under any
subsequent version of the License, You must distribute and
make the Covered Software available under the terms of the
version of the License under which You originally received
the Covered Software. Otherwise, You may also choose to
use, distribute or otherwise make the Covered Software
available under the terms of any subsequent version of the
License published by the license steward.
4.3. Modified Versions.
When You are an Initial Developer and You want to create a
new license for Your Original Software, You may create and
use a modified version of this License if You: (a) rename
the license and remove any references to the name of the
license steward (except to note that the license differs
from this License); and (b) otherwise make it clear that
the license contains terms which differ from this License.
5. DISCLAIMER OF WARRANTY.
COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS"
BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED
SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR
PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND
PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY
COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE
INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF
ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF
WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF
ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS
DISCLAIMER.
6. TERMINATION.
6.1. This License and the rights granted hereunder will
terminate automatically if You fail to comply with terms
herein and fail to cure such breach within 30 days of
becoming aware of the breach. Provisions which, by their
nature, must remain in effect beyond the termination of
this License shall survive.
6.2. If You assert a patent infringement claim (excluding
declaratory judgment actions) against Initial Developer or
a Contributor (the Initial Developer or Contributor against
whom You assert such claim is referred to as "Participant")
alleging that the Participant Software (meaning the
Contributor Version where the Participant is a Contributor
or the Original Software where the Participant is the
Initial Developer) directly or indirectly infringes any
patent, then any and all rights granted directly or
indirectly to You by such Participant, the Initial
Developer (if the Initial Developer is not the Participant)
and all Contributors under Sections 2.1 and/or 2.2 of this
License shall, upon 60 days notice from Participant
terminate prospectively and automatically at the expiration
of such 60 day notice period, unless if within such 60 day
period You withdraw Your claim with respect to the
Participant Software against such Participant either
unilaterally or pursuant to a written agreement with
Participant.
6.3. In the event of termination under Sections 6.1 or 6.2
above, all end user licenses that have been validly granted
by You or any distributor hereunder prior to termination
(excluding licenses granted to You by any distributor)
shall survive termination.
7. LIMITATION OF LIABILITY.
UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
(INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE
INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF
COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE
LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR
CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK
STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER
COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN
INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL
INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT
APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO
NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR
CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT
APPLY TO YOU.
8. U.S. GOVERNMENT END USERS.
The Covered Software is a "commercial item," as that term is
defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial
computer software" (as that term is defined at 48 C.F.R. ¤
252.227-7014(a)(1)) and "commercial computer software
documentation" as such terms are used in 48 C.F.R. 12.212 (Sept.
1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1
through 227.7202-4 (June 1995), all U.S. Government End Users
acquire Covered Software with only those rights set forth herein.
This U.S. Government Rights clause is in lieu of, and supersedes,
any other FAR, DFAR, or other clause or provision that addresses
Government rights in computer software under this License.
9. MISCELLANEOUS.
This License represents the complete agreement concerning subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the
extent necessary to make it enforceable. This License shall be
governed by the law of the jurisdiction specified in a notice
contained within the Original Software (except to the extent
applicable law, if any, provides otherwise), excluding such
jurisdiction's conflict-of-law provisions. Any litigation
relating to this License shall be subject to the jurisdiction of
the courts located in the jurisdiction and venue specified in a
notice contained within the Original Software, with the losing
party responsible for costs, including, without limitation, court
costs and reasonable attorneys' fees and expenses. The
application of the United Nations Convention on Contracts for the
International Sale of Goods is expressly excluded. Any law or
regulation which provides that the language of a contract shall
be construed against the drafter shall not apply to this License.
You agree that You alone are responsible for compliance with the
United States export administration regulations (and the export
control laws and regulation of any other countries) when You use,
distribute or otherwise make available any Covered Software.
10. RESPONSIBILITY FOR CLAIMS.
As between Initial Developer and the Contributors, each party is
responsible for claims and damages arising, directly or
indirectly, out of its utilization of rights under this License
and You agree to work with Initial Developer and Contributors to
distribute such responsibility on an equitable basis. Nothing
herein is intended or shall be deemed to constitute any admission
of liability.

102
pom.xml Normal file
View File

@@ -0,0 +1,102 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>tools.profiler</groupId>
<artifactId>async-profiler</artifactId>
<version>1.8.3</version>
<packaging>jar</packaging>
<name>async-profiler</name>
<url>https://profiler.tools</url>
<description>Low overhead sampling profiler for Java</description>
<licenses>
<license>
<name>Apache License Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<url>https://github.com/jvm-profiling-tools/async-profiler</url>
<connection>scm:git:git@github.com:jvm-profiling-tools/async-profiler.git</connection>
<developerConnection>scm:git:git@github.com:jvm-profiling-tools/async-profiler.git</developerConnection>
</scm>
<developers>
<developer>
<id>apangin</id>
<name>Andrei Pangin</name>
<email>noreply@pangin.pro</email>
</developer>
</developers>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<sourceDirectory>src/api</sourceDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.2.0</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.2.0</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.6</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
<repository>
<id>ossrh</id>
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2</url>
</repository>
</distributionManagement>
</project>

View File

@@ -1,10 +1,14 @@
#!/bin/bash
#!/bin/sh
set -eu
usage() {
echo "Usage: $0 [action] [options] <pid>"
echo "Actions:"
echo " start start profiling and return immediately"
echo " resume resume profiling without resetting collected data"
echo " stop stop profiling"
echo " jstack get a thread dump"
echo " check check if the specified profiling event is available"
echo " status print profiling status"
echo " list list profiling events supported by the target JVM"
echo " collect collect profile for the specified period of time"
@@ -14,53 +18,65 @@ usage() {
echo " -d duration run profiling for <duration> seconds"
echo " -f filename dump output to <filename>"
echo " -i interval sampling interval in nanoseconds"
echo " -b bufsize frame buffer size"
echo " -j jstackdepth maximum Java stack depth"
echo " -t profile different threads separately"
echo " -s simple class names instead of FQN"
echo " -o fmt[,fmt...] output format: summary|traces|flat|collapsed|svg"
echo " -g print method signatures"
echo " -a annotate Java method names"
echo " -o fmt output format: flat|collapsed|html|tree|jfr"
echo " -I include output only stack traces containing the specified pattern"
echo " -X exclude exclude stack traces with the specified pattern"
echo " -v, --version display version string"
echo ""
echo " --title string SVG title"
echo " --width px SVG width"
echo " --height px SVG frame height"
echo " --minwidth px skip frames smaller than px"
echo " --reverse generate stack-reversed FlameGraph"
echo " --title string FlameGraph title"
echo " --minwidth pct skip frames smaller than pct%"
echo " --reverse generate stack-reversed FlameGraph / Call tree"
echo ""
echo " --all-kernel only include kernel-mode events"
echo " --all-user only include user-mode events"
echo " --cstack mode how to traverse C stack: fp|lbr|no"
echo " --begin function begin profiling when function is executed"
echo " --end function end profiling when function is executed"
echo ""
echo "<pid> is a numeric process ID of the target JVM"
echo " or 'jps' keyword to find running JVM automatically using jps tool"
echo " or 'jps' keyword to find running JVM automatically"
echo " or the application's name as it would appear in the jps tool"
echo ""
echo "Example: $0 -d 30 -f profile.svg 3456"
echo " $0 start -i 999000 jps"
echo " $0 stop -o summary,flat jps"
echo " $0 stop -o flat jps"
echo " $0 -d 5 -e alloc MyAppName"
exit 1
}
mirror_output() {
# Mirror output from temporary file to local terminal
if [[ $USE_TMP ]]; then
if [[ -f $FILE ]]; then
cat $FILE
rm $FILE
if [ "$USE_TMP" = true ]; then
if [ -f "$FILE" ]; then
cat "$FILE"
rm "$FILE"
fi
fi
}
check_if_terminated() {
if ! kill -0 $PID 2> /dev/null; then
if ! kill -0 "$PID" 2> /dev/null; then
mirror_output
exit 0
fi
}
jattach() {
$JATTACH $PID load "$PROFILER" true "$1" > /dev/null
set +e
"$JATTACH" "$PID" load "$PROFILER" true "$1" > /dev/null
RET=$?
set -e
# Check if jattach failed
if [ $RET -ne 0 ]; then
if [ $RET -eq 255 ]; then
echo "Failed to inject profiler into $PID"
UNAME_S=$(uname -s)
if [ "$UNAME_S" == "Darwin" ]; then
if [ "$(uname -s)" = "Darwin" ]; then
otool -L "$PROFILER"
else
ldd "$PROFILER"
@@ -72,41 +88,38 @@ jattach() {
mirror_output
}
function abspath() {
UNAME_S=$(uname -s)
if [ "$UNAME_S" == "Darwin" ]; then
perl -MCwd -e 'print Cwd::abs_path shift' $1
else
readlink -f $1
fi
}
OPTIND=1
SCRIPT_DIR=$(dirname $0)
SCRIPT_DIR="$(cd "$(dirname "$0")" > /dev/null 2>&1; pwd -P)"
JATTACH=$SCRIPT_DIR/build/jattach
PROFILER=$(abspath $SCRIPT_DIR/build/libasyncProfiler.so)
PROFILER=$SCRIPT_DIR/build/libasyncProfiler.so
ACTION="collect"
EVENT="cpu"
DURATION="60"
FILE=""
USE_TMP="true"
INTERVAL=""
FRAMEBUF=""
THREADS=""
OUTPUT=""
FORMAT=""
PARAMS=""
PID=""
while [[ $# -gt 0 ]]; do
while [ $# -gt 0 ]; do
case $1 in
-h|"-?")
usage
;;
start|stop|status|list|collect)
start|resume|stop|check|status|list|collect)
ACTION="$1"
;;
jstack)
ACTION="start"
EVENT="jstack"
PARAMS="$PARAMS,threads"
;;
-v|--version)
ACTION="version"
;;
-e)
EVENT="$2"
EVENT="$(echo "$2" | sed 's/,/,event=/g')"
shift
;;
-d)
@@ -115,80 +128,131 @@ while [[ $# -gt 0 ]]; do
;;
-f)
FILE="$2"
unset USE_TMP
USE_TMP=false
shift
;;
-i)
INTERVAL=",interval=$2"
PARAMS="$PARAMS,interval=$2"
shift
;;
-b)
FRAMEBUF=",framebuf=$2"
-j)
PARAMS="$PARAMS,jstackdepth=$2"
shift
;;
-t)
THREADS=",threads"
PARAMS="$PARAMS,threads"
;;
-s)
FORMAT="$FORMAT,simple"
;;
-g)
FORMAT="$FORMAT,sig"
;;
-a)
FORMAT="$FORMAT,ann"
;;
-o)
OUTPUT="$2"
shift
;;
-I|--include)
FORMAT="$FORMAT,include=$2"
shift
;;
-X|--exclude)
FORMAT="$FORMAT,exclude=$2"
shift
;;
--filter)
FILTER="$(echo "$2" | sed 's/,/;/g')"
FORMAT="$FORMAT,filter=$FILTER"
shift
;;
--title)
# escape XML special characters and comma
TITLE=${2//&/&amp;}
TITLE=${TITLE//</&lt;}
TITLE=${TITLE//>/&gt;}
TITLE=${TITLE//,/&#44;}
TITLE="$(echo "$2" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g; s/,/\&#44;/g')"
FORMAT="$FORMAT,title=$TITLE"
shift
;;
--width|--height|--minwidth)
FORMAT="$FORMAT,${1:2}=$2"
FORMAT="$FORMAT,${1#--}=$2"
shift
;;
--reverse)
FORMAT="$FORMAT,reverse"
;;
--all-kernel)
PARAMS="$PARAMS,allkernel"
;;
--all-user)
PARAMS="$PARAMS,alluser"
;;
--cstack|--call-graph)
PARAMS="$PARAMS,cstack=$2"
shift
;;
--begin|--end)
PARAMS="$PARAMS,${1#--}=$2"
shift
;;
--safe-mode)
PARAMS="$PARAMS,safemode=$2"
shift
;;
[0-9]*)
PID="$1"
;;
jps)
# A shortcut for getting PID of a running Java application
# -XX:+PerfDisableSharedMem prevents jps from appearing in its own list
PID=$(jps -q -J-XX:+PerfDisableSharedMem)
PID=$(pgrep -n java || jps -q -J-XX:+PerfDisableSharedMem)
if [ "$PID" = "" ]; then
echo "No Java process could be found!"
fi
;;
-*)
echo "Unrecognized option: $1"
usage
;;
*)
echo "Unrecognized option: $1"
usage
;;
if [ $# -eq 1 ]; then
# the last argument is the application name as it would appear in the jps tool
PID=$(jps -J-XX:+PerfDisableSharedMem | grep " $1$" | head -n 1 | cut -d ' ' -f 1)
if [ "$PID" = "" ]; then
echo "No Java process '$1' could be found!"
fi
else
echo "Unrecognized option: $1"
usage
fi
;;
esac
shift
done
[[ "$PID" == "" ]] && usage
# if no -f argument is given, use temporary file to transfer output to caller terminal
if [[ $USE_TMP ]]; then
FILE=$(mktemp /tmp/async-profiler.XXXXXXXX)
if [ "$PID" = "" ] && [ "$ACTION" != "version" ]; then
usage
fi
# select default output format
if [[ "$OUTPUT" == "" ]]; then
if [[ $FILE == *.svg ]]; then
OUTPUT="svg"
elif [[ $FILE == *.collapsed ]] || [[ $FILE == *.folded ]]; then
OUTPUT="collapsed"
else
OUTPUT="summary,traces=200,flat=200"
fi
# If no -f argument is given, use temporary file to transfer output to caller terminal.
# Let the target process create the file in case this script is run by superuser.
if [ "$USE_TMP" = true ]; then
FILE=/tmp/async-profiler.$$.$PID
else
case "$FILE" in
/*)
# Path is absolute
;;
*)
# Output file is written by the target process. Make the path absolute to avoid confusion.
FILE=$PWD/$FILE
;;
esac
fi
case $ACTION in
start)
jattach "start,event=$EVENT,file=$FILE$INTERVAL$FRAMEBUF$THREADS,$OUTPUT$FORMAT"
start|resume|check)
jattach "$ACTION,event=$EVENT,file=$FILE,$OUTPUT$FORMAT$PARAMS"
;;
stop)
jattach "stop,file=$FILE,$OUTPUT$FORMAT"
@@ -200,11 +264,19 @@ case $ACTION in
jattach "list,file=$FILE"
;;
collect)
jattach "start,event=$EVENT,file=$FILE$INTERVAL$FRAMEBUF$THREADS,$OUTPUT$FORMAT"
while (( DURATION-- > 0 )); do
jattach "start,event=$EVENT,file=$FILE,$OUTPUT$FORMAT$PARAMS"
while [ "$DURATION" -gt 0 ]; do
DURATION=$(( DURATION-1 ))
check_if_terminated
sleep 1
done
jattach "stop,file=$FILE,$OUTPUT$FORMAT"
;;
version)
if [ "$PID" = "" ]; then
java "-agentpath:$PROFILER=version=full" -version 2> /dev/null
else
jattach "version=full,file=$FILE"
fi
;;
esac

181
src/allocTracer.cpp Executable file → Normal file
View File

@@ -14,123 +14,130 @@
* limitations under the License.
*/
#include <sys/mman.h>
#include "allocTracer.h"
#include "os.h"
#include "profiler.h"
#include "stackFrame.h"
#include "vmStructs.h"
// JDK 7-9
Trap AllocTracer::_in_new_tlab("_ZN11AllocTracer33send_allocation_in_new_tlab_event");
Trap AllocTracer::_outside_tlab("_ZN11AllocTracer34send_allocation_outside_tlab_event");
// JDK 10+
Trap AllocTracer::_in_new_tlab2("_ZN11AllocTracer27send_allocation_in_new_tlab");
Trap AllocTracer::_outside_tlab2("_ZN11AllocTracer28send_allocation_outside_tlab");
int AllocTracer::_trap_kind;
Trap AllocTracer::_in_new_tlab;
Trap AllocTracer::_outside_tlab;
u64 AllocTracer::_interval;
volatile u64 AllocTracer::_allocated_bytes;
// Resolve the address of the intercepted function
bool Trap::resolve(NativeCodeCache* libjvm) {
if (_entry != NULL) {
return true;
}
_entry = (instruction_t*)libjvm->findSymbol(_func_name);
if (_entry != NULL) {
// Make the entry point writable, so we can rewrite instructions
uintptr_t page_start = (uintptr_t)_entry & ~PAGE_MASK;
mprotect((void*)page_start, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
return true;
}
return false;
}
// Insert breakpoint at the very first instruction
void Trap::install() {
if (_entry != NULL) {
_saved_insn = *_entry;
*_entry = BREAKPOINT;
flushCache(_entry);
}
}
// Clear breakpoint - restore the original instruction
void Trap::uninstall() {
if (_entry != NULL) {
*_entry = _saved_insn;
flushCache(_entry);
}
}
void AllocTracer::installSignalHandler() {
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_handler = NULL;
sa.sa_sigaction = signalHandler;
sa.sa_flags = SA_RESTART | SA_SIGINFO;
sigaction(SIGTRAP, &sa, NULL);
}
// Called whenever our breakpoint trap is hit
void AllocTracer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
StackFrame frame(ucontext);
int event_type;
uintptr_t total_size;
uintptr_t instance_size;
// PC points either to BREAKPOINT instruction or to the next one
if (frame.pc() - (uintptr_t)_in_new_tlab._entry <= sizeof(instruction_t)) {
// send_allocation_in_new_tlab_event(KlassHandle klass, size_t tlab_size, size_t alloc_size)
recordAllocation(ucontext, frame.arg0(), frame.arg1(), false);
} else if (frame.pc() - (uintptr_t)_outside_tlab._entry <= sizeof(instruction_t)) {
// send_allocation_outside_tlab_event(KlassHandle klass, size_t alloc_size);
recordAllocation(ucontext, frame.arg0(), frame.arg1(), true);
} else if (frame.pc() - (uintptr_t)_in_new_tlab2._entry <= sizeof(instruction_t)) {
if (_in_new_tlab.covers(frame.pc())) {
// send_allocation_in_new_tlab(Klass* klass, HeapWord* obj, size_t tlab_size, size_t alloc_size, Thread* thread)
recordAllocation(ucontext, frame.arg0(), frame.arg2(), false);
} else if (frame.pc() - (uintptr_t)_outside_tlab2._entry <= sizeof(instruction_t)) {
// send_allocation_in_new_tlab_event(KlassHandle klass, size_t tlab_size, size_t alloc_size)
event_type = BCI_ALLOC;
total_size = _trap_kind == 1 ? frame.arg2() : frame.arg1();
instance_size = _trap_kind == 1 ? frame.arg3() : frame.arg2();
} else if (_outside_tlab.covers(frame.pc())) {
// send_allocation_outside_tlab(Klass* klass, HeapWord* obj, size_t alloc_size, Thread* thread)
recordAllocation(ucontext, frame.arg0(), frame.arg2(), true);
// send_allocation_outside_tlab_event(KlassHandle klass, size_t alloc_size);
event_type = BCI_ALLOC_OUTSIDE_TLAB;
total_size = _trap_kind == 1 ? frame.arg2() : frame.arg1();
instance_size = 0;
} else {
// Not our trap; nothing to do
// Not our trap
return;
}
// Leave the trapped function by simulating "ret" instruction
uintptr_t klass = frame.arg0();
frame.ret();
if (_enabled) {
// TODO: _enabled also uses traps
recordAllocation(ucontext, event_type, klass, total_size, instance_size);
}
}
void AllocTracer::recordAllocation(void* ucontext, uintptr_t rklass, uintptr_t rsize, bool outside_tlab) {
VMSymbol* symbol = VMKlass::fromHandle(rklass)->name();
if (outside_tlab) {
// Invert the last bit to distinguish jmethodID from the allocation in new TLAB
Profiler::_instance.recordSample(ucontext, rsize, BCI_SYMBOL_OUTSIDE_TLAB, (jmethodID)((uintptr_t)symbol ^ 1));
void AllocTracer::recordAllocation(void* ucontext, int event_type, uintptr_t rklass,
uintptr_t total_size, uintptr_t instance_size) {
if (_interval) {
// Do not record allocation unless allocated at least _interval bytes
while (true) {
u64 prev = _allocated_bytes;
u64 next = prev + total_size;
if (next < _interval) {
if (__sync_bool_compare_and_swap(&_allocated_bytes, prev, next)) {
return;
}
} else {
if (__sync_bool_compare_and_swap(&_allocated_bytes, prev, next % _interval)) {
break;
}
}
}
}
AllocEvent event;
event._class_id = 0;
event._total_size = total_size;
event._instance_size = instance_size;
if (VMStructs::hasClassNames()) {
VMSymbol* symbol = VMKlass::fromHandle(rklass)->name();
event._class_id = Profiler::_instance.classMap()->lookup(symbol->body(), symbol->length());
}
Profiler::_instance.recordSample(ucontext, total_size, event_type, &event);
}
Error AllocTracer::check(Arguments& args) {
if (_in_new_tlab.entry() != 0 && _outside_tlab.entry() != 0) {
return Error::OK;
}
NativeCodeCache* libjvm = VMStructs::libjvm();
const void* ne;
const void* oe;
if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer27send_allocation_in_new_tlab")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer28send_allocation_outside_tlab")) != NULL) {
_trap_kind = 1; // JDK 10+
} else if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer33send_allocation_in_new_tlab_eventE11KlassHandleP8HeapWord")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer34send_allocation_outside_tlab_eventE11KlassHandleP8HeapWord")) != NULL) {
_trap_kind = 1; // JDK 8u262+
} else if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer33send_allocation_in_new_tlab_event")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer34send_allocation_outside_tlab_event")) != NULL) {
_trap_kind = 2; // JDK 7-9
} else {
Profiler::_instance.recordSample(ucontext, rsize, BCI_SYMBOL, (jmethodID)symbol);
}
}
Error AllocTracer::start(const char* event, long interval) {
NativeCodeCache* libjvm = Profiler::_instance.jvmLibrary();
if (libjvm == NULL) {
return Error("libjvm not found among loaded libraries");
}
if (!VMStructs::init(libjvm)) {
return Error("VMStructs unavailable. Unsupported JVM?");
}
if (!(_in_new_tlab.resolve(libjvm) || _in_new_tlab2.resolve(libjvm)) ||
!(_outside_tlab.resolve(libjvm) || _outside_tlab2.resolve(libjvm))) {
return Error("No AllocTracer symbols found. Are JDK debug symbols installed?");
}
installSignalHandler();
if (!_in_new_tlab.assign(ne) || !_outside_tlab.assign(oe)) {
return Error("Unable to install allocation trap");
}
return Error::OK;
}
Error AllocTracer::start(Arguments& args) {
Error error = check(args);
if (error) {
return error;
}
_interval = args._interval;
_allocated_bytes = 0;
OS::installSignalHandler(SIGTRAP, signalHandler);
_in_new_tlab.install();
_outside_tlab.install();
_in_new_tlab2.install();
_outside_tlab2.install();
return Error::OK;
}
@@ -138,6 +145,4 @@ Error AllocTracer::start(const char* event, long interval) {
void AllocTracer::stop() {
_in_new_tlab.uninstall();
_outside_tlab.uninstall();
_in_new_tlab2.uninstall();
_outside_tlab2.uninstall();
}

46
src/allocTracer.h Executable file → Normal file
View File

@@ -19,49 +19,39 @@
#include <signal.h>
#include <stdint.h>
#include "arch.h"
#include "codeCache.h"
#include "engine.h"
// Describes OpenJDK function being intercepted
class Trap {
private:
const char* _func_name;
instruction_t* _entry;
instruction_t _saved_insn;
public:
Trap(const char* func_name) : _func_name(func_name), _entry(NULL) {
}
bool resolve(NativeCodeCache* libjvm);
void install();
void uninstall();
friend class AllocTracer;
};
#include "trap.h"
class AllocTracer : public Engine {
private:
// JDK 7-9
static int _trap_kind;
static Trap _in_new_tlab;
static Trap _outside_tlab;
// JDK 10+
static Trap _in_new_tlab2;
static Trap _outside_tlab2;
static void installSignalHandler();
static u64 _interval;
static volatile u64 _allocated_bytes;
static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext);
static void recordAllocation(void* ucontext, uintptr_t rklass, uintptr_t rsize, bool outside_tlab);
static void recordAllocation(void* ucontext, int event_type, uintptr_t rklass,
uintptr_t total_size, uintptr_t instance_size);
public:
const char* name() {
return "alloc";
}
Error start(const char* event, long interval);
const char* units() {
return "bytes";
}
CStack cstack() {
return CSTACK_NO;
}
Error check(Arguments& args);
Error start(Arguments& args);
void stop();
};

View File

@@ -0,0 +1,191 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.profiler;
import java.io.IOException;
/**
* Java API for in-process profiling. Serves as a wrapper around
* async-profiler native library. This class is a singleton.
* The first call to {@link #getInstance()} initiates loading of
* libasyncProfiler.so.
*/
public class AsyncProfiler implements AsyncProfilerMXBean {
private static AsyncProfiler instance;
private AsyncProfiler() {
}
public static AsyncProfiler getInstance() {
return getInstance(null);
}
public static synchronized AsyncProfiler getInstance(String libPath) {
if (instance != null) {
return instance;
}
if (libPath == null) {
System.loadLibrary("asyncProfiler");
} else {
System.load(libPath);
}
instance = new AsyncProfiler();
return instance;
}
/**
* Start profiling
*
* @param event Profiling event, see {@link Events}
* @param interval Sampling interval, e.g. nanoseconds for Events.CPU
* @throws IllegalStateException If profiler is already running
*/
@Override
public void start(String event, long interval) throws IllegalStateException {
start0(event, interval, true);
}
/**
* Start or resume profiling without resetting collected data.
* Note that event and interval may change since the previous profiling session.
*
* @param event Profiling event, see {@link Events}
* @param interval Sampling interval, e.g. nanoseconds for Events.CPU
* @throws IllegalStateException If profiler is already running
*/
@Override
public void resume(String event, long interval) throws IllegalStateException {
start0(event, interval, false);
}
/**
* Stop profiling (without dumping results)
*
* @throws IllegalStateException If profiler is not running
*/
@Override
public void stop() throws IllegalStateException {
stop0();
}
/**
* Get the number of samples collected during the profiling session
*
* @return Number of samples
*/
@Override
public native long getSamples();
/**
* Get profiler agent version, e.g. "1.0"
*
* @return Version string
*/
@Override
public String getVersion() {
try {
return execute0("version");
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Execute an agent-compatible profiling command -
* the comma-separated list of arguments described in arguments.cpp
*
* @param command Profiling command
* @return The command result
* @throws IllegalArgumentException If failed to parse the command
* @throws IOException If failed to create output file
*/
@Override
public String execute(String command) throws IllegalArgumentException, IOException {
return execute0(command);
}
/**
* Dump profile in 'collapsed stacktraces' format
*
* @param counter Which counter to display in the output
* @return Textual representation of the profile
*/
@Override
public String dumpCollapsed(Counter counter) {
try {
return execute0("collapsed,counter=" + counter.name().toLowerCase());
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Dump flat profile, i.e. the histogram of the hottest methods
*
* @param maxMethods Maximum number of methods to dump. 0 means no limit
* @return Textual representation of the profile
*/
@Override
public String dumpFlat(int maxMethods) {
try {
return execute0("flat=" + maxMethods);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Add the given thread to the set of profiled threads.
* 'filter' option must be enabled to use this method.
*
* @param thread Thread to include in profiling
*/
public void addThread(Thread thread) {
filterThread(thread, true);
}
/**
* Remove the given thread from the set of profiled threads.
* 'filter' option must be enabled to use this method.
*
* @param thread Thread to exclude from profiling
*/
public void removeThread(Thread thread) {
filterThread(thread, false);
}
private void filterThread(Thread thread, boolean enable) {
if (thread == null) {
filterThread0(null, enable);
} else {
// Need to take lock to avoid race condition with a thread state change
synchronized (thread) {
Thread.State state = thread.getState();
if (state != Thread.State.NEW && state != Thread.State.TERMINATED) {
filterThread0(thread, enable);
}
}
}
}
private native void start0(String event, long interval, boolean reset) throws IllegalStateException;
private native void stop0() throws IllegalStateException;
private native String execute0(String command) throws IllegalArgumentException, IOException;
private native void filterThread0(Thread thread, boolean enable);
}

View File

@@ -29,13 +29,14 @@ package one.profiler;
*/
public interface AsyncProfilerMXBean {
void start(String event, long interval) throws IllegalStateException;
void resume(String event, long interval) throws IllegalStateException;
void stop() throws IllegalStateException;
long getSamples();
String getVersion();
String execute(String command) throws IllegalArgumentException;
String execute(String command) throws IllegalArgumentException, java.io.IOException;
String dumpCollapsed(Counter counter);
String dumpTraces(int maxTraces);
String dumpFlat(int maxMethods);
}

View File

@@ -0,0 +1,28 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.profiler;
/**
* Predefined event names to use in {@link AsyncProfiler#start(String, long)}
*/
public class Events {
public static final String CPU = "cpu";
public static final String ALLOC = "alloc";
public static final String LOCK = "lock";
public static final String WALL = "wall";
public static final String ITIMER = "itimer";
}

35
src/arch.h Executable file → Normal file
View File

@@ -18,12 +18,18 @@
#define _ARCH_H
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
#ifndef PAGE_SIZE
const unsigned long PAGE_SIZE = 4096;
#endif
const unsigned long PAGE_MASK = PAGE_SIZE - 1;
static inline u64 atomicInc(volatile u64& var, u64 increment = 1) {
return __sync_fetch_and_add(&var, increment);
}
static inline int atomicInc(volatile int& var, int increment = 1) {
return __sync_fetch_and_add(&var, increment);
}
#if defined(__x86_64__) || defined(__i386__)
@@ -31,6 +37,11 @@ const unsigned long PAGE_MASK = PAGE_SIZE - 1;
typedef unsigned char instruction_t;
const instruction_t BREAKPOINT = 0xcc;
const int SYSCALL_SIZE = 2;
const int PLT_HEADER_SIZE = 16;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 8; // PERF_REG_X86_IP
#define spinPause() asm volatile("pause")
#define rmb() asm volatile("lfence" : : : "memory")
#define flushCache(addr) asm volatile("mfence; clflush (%0); mfence" : : "r"(addr) : "memory")
@@ -39,6 +50,12 @@ const instruction_t BREAKPOINT = 0xcc;
typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0xe7f001f0;
const instruction_t BREAKPOINT_THUMB = 0xde01de01;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int PLT_HEADER_SIZE = 20;
const int PLT_ENTRY_SIZE = 12;
const int PERF_REG_PC = 15; // PERF_REG_ARM_PC
#define spinPause() asm volatile("yield")
#define rmb() asm volatile("dmb ish" : : : "memory")
@@ -49,16 +66,18 @@ const instruction_t BREAKPOINT = 0xe7f001f0;
typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0xd4200000;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int PLT_HEADER_SIZE = 32;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC
#define spinPause() asm volatile("yield")
#define rmb() asm volatile("dmb ish" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#else
#warning "Compiling on unsupported arch"
#define spinPause()
#define rmb() __sync_synchronize()
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#error "Compiling on unsupported arch"
#endif

385
src/arguments.cpp Executable file → Normal file
View File

@@ -15,115 +15,346 @@
*/
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <unistd.h>
#include "arguments.h"
// Predefined value that denotes successful operation
const Error Error::OK(NULL);
// Extra buffer space for expanding file pattern
const size_t EXTRA_BUF_SIZE = 512;
// Statically compute hash code of a string containing up to 12 [a-z] letters
#define HASH(s) (HASH12(s " "))
#define HASH12(s) (s[0] & 31LL) | (s[1] & 31LL) << 5 | (s[2] & 31LL) << 10 | (s[3] & 31LL) << 15 | \
(s[4] & 31LL) << 20 | (s[5] & 31LL) << 25 | (s[6] & 31LL) << 30 | (s[7] & 31LL) << 35 | \
(s[8] & 31LL) << 40 | (s[9] & 31LL) << 45 | (s[10] & 31LL) << 50 | (s[11] & 31LL) << 55
// Simulate switch statement over string hashes
#define SWITCH(arg) long long arg_hash = hash(arg); if (0)
#define CASE(s) } else if (arg_hash == HASH(s)) {
#define CASE2(s1, s2) } else if (arg_hash == HASH(s1) || arg_hash == HASH(s2)) {
// Parses agent arguments.
// The format of the string is:
// arg[,arg...]
// where arg is one of the following options:
// start - start profiling
// stop - stop profiling
// status - print profiling status (inactive / running for X seconds)
// list - show the list of available profiling events
// event=EVENT - which event to trace (cpu, alloc, lock, cache-misses etc.)
// collapsed[=C] - dump collapsed stacks (the format used by FlameGraph script)
// C is counter type: 'samples' or 'total'
// svg[=C] - produce Flame Graph in SVG format
// C is counter type: 'samples' or 'total'
// summary - dump profiling summary (number of collected samples of each type)
// traces[=N] - dump top N call traces
// flat[=N] - dump top N methods (aka flat profile)
// interval=N - sampling interval in ns (default: 1'000'000, i.e. 1 ms)
// framebuf=N - size of the buffer for stack frames (default: 1'000'000)
// threads - profile different threads separately
// simple - simple class names instead of FQN
// title=TITLE - FlameGraph title
// width=PX - FlameGraph image width
// height=PX - FlameGraph frame height
// minwidth=PX - FlameGraph minimum frame width
// reverse - generate stack-reversed FlameGraph
// file=FILENAME - output file name for dumping
// start - start profiling
// resume - start or resume profiling without resetting collected data
// stop - stop profiling
// check - check if the specified profiling event is available
// status - print profiling status (inactive / running for X seconds)
// list - show the list of available profiling events
// version[=full] - display the agent version
// event=EVENT - which event to trace (cpu, alloc, lock, cache-misses etc.)
// collapsed[=C] - dump collapsed stacks (the format used by FlameGraph script)
// html[=C] - produce Flame Graph in HTML format
// tree[=C] - produce call tree in HTML format
// C is counter type: 'samples' or 'total'
// jfr - dump events in Java Flight Recorder format
// flat[=N] - dump top N methods (aka flat profile)
// interval=N - sampling interval in ns (default: 10'000'000, i.e. 10 ms)
// jstackdepth=N - maximum Java stack depth (default: 2048)
// safemode=BITS - disable stack recovery techniques (default: 0, i.e. everything enabled)
// file=FILENAME - output file name for dumping
// filter=FILTER - thread filter
// threads - profile different threads separately
// cstack=MODE - how to collect C stack frames in addition to Java stack
// MODE is 'fp' (Frame Pointer), 'lbr' (Last Branch Record) or 'no'
// allkernel - include only kernel-mode events
// alluser - include only user-mode events
// simple - simple class names instead of FQN
// dot - dotted class names
// sig - print method signatures
// ann - annotate Java method names
// include=PATTERN - include stack traces containing PATTERN
// exclude=PATTERN - exclude stack traces containing PATTERN
// begin=FUNCTION - begin profiling when FUNCTION is executed
// end=FUNCTION - end profiling when FUNCTION is executed
// title=TITLE - FlameGraph title
// minwidth=PCT - FlameGraph minimum frame width in percent
// reverse - generate stack-reversed FlameGraph / Call tree
//
// It is possible to specify multiple dump options at the same time
Error Arguments::parse(const char* args) {
if (args == NULL) {
return Error::OK;
} else if (strlen(args) >= sizeof(_buf)) {
return Error("Argument list too long");
}
size_t len = strlen(args);
free(_buf);
_buf = (char*)malloc(len + EXTRA_BUF_SIZE);
if (_buf == NULL) {
return Error("Not enough memory to parse arguments");
}
strcpy(_buf, args);
for (char* arg = strtok(_buf, ","); arg != NULL; arg = strtok(NULL, ",")) {
char* value = strchr(arg, '=');
if (value != NULL) *value++ = 0;
if (strcmp(arg, "start") == 0) {
_action = ACTION_START;
} else if (strcmp(arg, "stop") == 0) {
_action = ACTION_STOP;
} else if (strcmp(arg, "status") == 0) {
_action = ACTION_STATUS;
} else if (strcmp(arg, "list") == 0) {
_action = ACTION_LIST;
} else if (strcmp(arg, "event") == 0) {
if (value == NULL || value[0] == 0) {
return Error("event must not be empty");
}
_event = value;
} else if (strcmp(arg, "collapsed") == 0 || strcmp(arg, "folded") == 0) {
_dump_collapsed = true;
_counter = value == NULL || strcmp(value, "samples") == 0 ? COUNTER_SAMPLES : COUNTER_TOTAL;
} else if (strcmp(arg, "flamegraph") == 0 || strcmp(arg, "svg") == 0) {
_dump_flamegraph = true;
_counter = value == NULL || strcmp(value, "samples") == 0 ? COUNTER_SAMPLES : COUNTER_TOTAL;
} else if (strcmp(arg, "summary") == 0) {
_dump_summary = true;
} else if (strcmp(arg, "traces") == 0) {
_dump_traces = value == NULL ? INT_MAX : atoi(value);
} else if (strcmp(arg, "flat") == 0) {
_dump_flat = value == NULL ? INT_MAX : atoi(value);
} else if (strcmp(arg, "interval") == 0) {
if (value == NULL || (_interval = atol(value)) <= 0) {
return Error("interval must be > 0");
}
} else if (strcmp(arg, "framebuf") == 0) {
if (value == NULL || (_framebuf = atoi(value)) <= 0) {
return Error("framebuf must be > 0");
}
} else if (strcmp(arg, "threads") == 0) {
_threads = true;
} else if (strcmp(arg, "simple") == 0) {
_simple = true;
} else if (strcmp(arg, "title") == 0 && value != NULL) {
_title = value;
} else if (strcmp(arg, "width") == 0 && value != NULL) {
_width = atoi(value);
} else if (strcmp(arg, "height") == 0 && value != NULL) {
_height = atoi(value);
} else if (strcmp(arg, "minwidth") == 0 && value != NULL) {
_minwidth = atof(value);
} else if (strcmp(arg, "reverse") == 0) {
_reverse = true;
} else if (strcmp(arg, "file") == 0) {
if (value == NULL || value[0] == 0) {
return Error("file must not be empty");
}
_file = value;
SWITCH (arg) {
// Actions
CASE("start")
_action = ACTION_START;
CASE("resume")
_action = ACTION_RESUME;
CASE("stop")
_action = ACTION_STOP;
CASE("check")
_action = ACTION_CHECK;
CASE("status")
_action = ACTION_STATUS;
CASE("list")
_action = ACTION_LIST;
CASE("version")
_action = value == NULL ? ACTION_VERSION : ACTION_FULL_VERSION;
// Output formats
CASE2("collapsed", "folded")
_output = OUTPUT_COLLAPSED;
_counter = value == NULL || strcmp(value, "samples") == 0 ? COUNTER_SAMPLES : COUNTER_TOTAL;
CASE2("flamegraph", "html")
_output = OUTPUT_FLAMEGRAPH;
_counter = value == NULL || strcmp(value, "samples") == 0 ? COUNTER_SAMPLES : COUNTER_TOTAL;
CASE("tree")
_output = OUTPUT_TREE;
_counter = value == NULL || strcmp(value, "samples") == 0 ? COUNTER_SAMPLES : COUNTER_TOTAL;
CASE("jfr")
_output = OUTPUT_JFR;
CASE("flat")
_output = OUTPUT_FLAT;
_dump_flat = value == NULL ? INT_MAX : atoi(value);
// Basic options
CASE("event")
if (value == NULL || value[0] == 0) {
return Error("event must not be empty");
}
if (!addEvent(value)) {
return Error("multiple incompatible events");
}
CASE("interval")
if (value == NULL || (_interval = parseUnits(value)) <= 0) {
return Error("Invalid interval");
}
CASE("jstackdepth")
if (value == NULL || (_jstackdepth = atoi(value)) <= 0) {
return Error("jstackdepth must be > 0");
}
CASE("safemode")
_safe_mode = value == NULL ? INT_MAX : atoi(value);
CASE("file")
if (value == NULL || value[0] == 0) {
return Error("file must not be empty");
}
_file = value;
// Filters
CASE("filter")
_filter = value == NULL ? "" : value;
CASE("include")
if (value != NULL) appendToEmbeddedList(_include, value);
CASE("exclude")
if (value != NULL) appendToEmbeddedList(_exclude, value);
CASE("threads")
_threads = true;
CASE("allkernel")
_ring = RING_KERNEL;
CASE("alluser")
_ring = RING_USER;
CASE("cstack")
if (value != NULL) {
if (value[0] == 'n') {
_cstack = CSTACK_NO;
} else if (value[0] == 'l') {
_cstack = CSTACK_LBR;
} else {
_cstack = CSTACK_FP;
}
}
// Output style modifiers
CASE("simple")
_style |= STYLE_SIMPLE;
CASE("dot")
_style |= STYLE_DOTTED;
CASE("sig")
_style |= STYLE_SIGNATURES;
CASE("ann")
_style |= STYLE_ANNOTATE;
CASE("begin")
_begin = value;
CASE("end")
_end = value;
// FlameGraph options
CASE("title")
if (value != NULL) _title = value;
CASE("minwidth")
if (value != NULL) _minwidth = atof(value);
CASE("reverse")
_reverse = true;
}
}
if (dumpRequested() && (_action == ACTION_NONE || _action == ACTION_STOP)) {
if (_file != NULL && strchr(_file, '%') != NULL) {
_file = expandFilePattern(_buf + len + 1, EXTRA_BUF_SIZE - 1, _file);
}
if (_file != NULL && _output == OUTPUT_NONE) {
_output = detectOutputFormat(_file);
_dump_flat = 200;
}
if (_output != OUTPUT_NONE && (_action == ACTION_NONE || _action == ACTION_STOP)) {
_action = ACTION_DUMP;
}
return Error::OK;
}
bool Arguments::addEvent(const char* event) {
if (strcmp(event, EVENT_ALLOC) == 0) {
_events |= EK_ALLOC;
} else if (strcmp(event, EVENT_LOCK) == 0) {
_events |= EK_LOCK;
} else {
if (_events & EK_CPU) {
return false;
}
_events |= EK_CPU;
_event_desc = event;
}
return true;
}
// The linked list of string offsets is embedded right into _buf array
void Arguments::appendToEmbeddedList(int& list, char* value) {
((int*)value)[-1] = list;
list = (int)(value - _buf);
}
// Should match statically computed HASH(arg)
long long Arguments::hash(const char* arg) {
long long h = 0;
for (int shift = 0; *arg != 0; shift += 5) {
h |= (*arg++ & 31LL) << shift;
}
return h;
}
// Expands %p to the process id
// %t to the timestamp
const char* Arguments::expandFilePattern(char* dest, size_t max_size, const char* pattern) {
char* ptr = dest;
char* end = dest + max_size - 1;
while (ptr < end && *pattern != 0) {
char c = *pattern++;
if (c == '%') {
c = *pattern++;
if (c == 0) {
break;
} else if (c == 'p') {
ptr += snprintf(ptr, end - ptr, "%d", getpid());
continue;
} else if (c == 't') {
time_t timestamp = time(NULL);
struct tm t;
localtime_r(&timestamp, &t);
ptr += snprintf(ptr, end - ptr, "%d%02d%02d-%02d%02d%02d",
t.tm_year + 1900, t.tm_mon + 1, t.tm_mday,
t.tm_hour, t.tm_min, t.tm_sec);
continue;
}
}
*ptr++ = c;
}
*ptr = 0;
return dest;
}
Output Arguments::detectOutputFormat(const char* file) {
const char* ext = strrchr(file, '.');
if (ext != NULL) {
if (strcmp(ext, ".html") == 0) {
return OUTPUT_FLAMEGRAPH;
} else if (strcmp(ext, ".jfr") == 0) {
return OUTPUT_JFR;
} else if (strcmp(ext, ".collapsed") == 0 || strcmp(ext, ".folded") == 0) {
return OUTPUT_COLLAPSED;
}
}
return OUTPUT_FLAT;
}
long Arguments::parseUnits(const char* str) {
char* end;
long result = strtol(str, &end, 0);
switch (*end) {
case 0:
return result;
case 'K': case 'k':
case 'U': case 'u': // microseconds
return result * 1000;
case 'M': case 'm': // million, megabytes or milliseconds
return result * 1000000;
case 'G': case 'g':
case 'S': case 's': // seconds
return result * 1000000000;
}
return -1;
}
Arguments::~Arguments() {
if (!_shared) free(_buf);
}
void Arguments::save(Arguments& other) {
if (!_shared) free(_buf);
*this = other;
other._shared = true;
}

126
src/arguments.h Executable file → Normal file
View File

@@ -20,19 +20,26 @@
#include <stddef.h>
const long DEFAULT_INTERVAL = 1000000; // 1 ms
const int DEFAULT_FRAMEBUF = 1000000;
const long DEFAULT_INTERVAL = 10000000; // 10 ms
const int DEFAULT_JSTACKDEPTH = 2048;
const char* const EVENT_CPU = "cpu";
const char* const EVENT_ALLOC = "alloc";
const char* const EVENT_LOCK = "lock";
const char* const EVENT_CPU = "cpu";
const char* const EVENT_ALLOC = "alloc";
const char* const EVENT_LOCK = "lock";
const char* const EVENT_WALL = "wall";
const char* const EVENT_ITIMER = "itimer";
const char* const EVENT_JSTACK = "jstack";
enum Action {
ACTION_NONE,
ACTION_START,
ACTION_RESUME,
ACTION_STOP,
ACTION_CHECK,
ACTION_STATUS,
ACTION_LIST,
ACTION_VERSION,
ACTION_FULL_VERSION,
ACTION_DUMP
};
@@ -41,6 +48,41 @@ enum Counter {
COUNTER_TOTAL
};
enum Ring {
RING_ANY,
RING_KERNEL,
RING_USER
};
enum EventKind {
EK_CPU = 1,
EK_ALLOC = 2,
EK_LOCK = 4
};
enum Style {
STYLE_SIMPLE = 1,
STYLE_DOTTED = 2,
STYLE_SIGNATURES = 4,
STYLE_ANNOTATE = 8
};
enum CStack {
CSTACK_DEFAULT,
CSTACK_NO,
CSTACK_FP,
CSTACK_LBR
};
enum Output {
OUTPUT_NONE,
OUTPUT_FLAT,
OUTPUT_COLLAPSED,
OUTPUT_FLAMEGRAPH,
OUTPUT_TREE,
OUTPUT_JFR
};
class Error {
private:
@@ -64,55 +106,77 @@ class Error {
class Arguments {
private:
char _buf[1024];
char* _buf;
bool _shared;
void appendToEmbeddedList(int& list, char* value);
static long long hash(const char* arg);
static const char* expandFilePattern(char* dest, size_t max_size, const char* pattern);
static Output detectOutputFormat(const char* file);
static long parseUnits(const char* str);
public:
Action _action;
Counter _counter;
const char* _event;
Ring _ring;
int _events;
const char* _event_desc;
long _interval;
int _framebuf;
int _jstackdepth;
int _safe_mode;
const char* _file;
const char* _filter;
int _include;
int _exclude;
bool _threads;
bool _simple;
char* _file;
bool _dump_collapsed;
bool _dump_flamegraph;
bool _dump_summary;
int _dump_traces;
int _style;
CStack _cstack;
Output _output;
int _dump_flat;
const char* _begin;
const char* _end;
// FlameGraph parameters
const char* _title;
int _width;
int _height;
double _minwidth;
bool _reverse;
Arguments() :
_buf(NULL),
_shared(false),
_action(ACTION_NONE),
_counter(COUNTER_SAMPLES),
_event(EVENT_CPU),
_ring(RING_ANY),
_events(0),
_event_desc(NULL),
_interval(0),
_framebuf(DEFAULT_FRAMEBUF),
_threads(false),
_simple(false),
_jstackdepth(DEFAULT_JSTACKDEPTH),
_safe_mode(0),
_file(NULL),
_dump_collapsed(false),
_dump_flamegraph(false),
_dump_summary(false),
_dump_traces(0),
_dump_flat(0),
_filter(NULL),
_include(0),
_exclude(0),
_threads(false),
_style(0),
_cstack(CSTACK_DEFAULT),
_output(OUTPUT_NONE),
_begin(NULL),
_end(NULL),
_title("Flame Graph"),
_width(1200),
_height(16),
_minwidth(1),
_minwidth(0),
_reverse(false) {
}
bool dumpRequested() {
return _dump_collapsed || _dump_flamegraph || _dump_summary || _dump_traces > 0 || _dump_flat > 0;
}
~Arguments();
void save(Arguments& other);
Error parse(const char* args);
bool addEvent(const char* event);
friend class FrameName;
friend class Recording;
};
#endif // _ARGUMENTS_H

246
src/callTraceStorage.cpp Normal file
View File

@@ -0,0 +1,246 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <string.h>
#include <unistd.h>
#include "callTraceStorage.h"
#include "os.h"
static const u32 INITIAL_CAPACITY = 65536;
static const u32 CALL_TRACE_CHUNK = 8 * 1024 * 1024;
static const size_t PAGE_ALIGNMENT = sysconf(_SC_PAGESIZE) - 1;
class LongHashTable {
private:
LongHashTable* _prev;
void* _padding0;
u32 _capacity;
u32 _padding1[15];
volatile u32 _size;
u32 _padding2[15];
static size_t getSize(u32 capacity) {
size_t size = sizeof(LongHashTable) + (sizeof(u64) + sizeof(CallTraceSample)) * capacity;
return (size + PAGE_ALIGNMENT) & ~PAGE_ALIGNMENT;
}
public:
static LongHashTable* allocate(LongHashTable* prev, u32 capacity) {
LongHashTable* table = (LongHashTable*)OS::safeAlloc(getSize(capacity));
if (table != NULL) {
table->_prev = prev;
table->_capacity = capacity;
table->_size = 0;
}
return table;
}
LongHashTable* destroy() {
LongHashTable* prev = _prev;
OS::safeFree(this, getSize(_capacity));
return prev;
}
LongHashTable* prev() {
return _prev;
}
u32 capacity() {
return _capacity;
}
u32 size() {
return _size;
}
u32 incSize() {
return __sync_add_and_fetch(&_size, 1);
}
u64* keys() {
return (u64*)(this + 1);
}
CallTraceSample* values() {
return (CallTraceSample*)(keys() + _capacity);
}
void clear() {
memset(keys(), 0, (sizeof(u64) + sizeof(CallTraceSample)) * _capacity);
_size = 0;
}
};
CallTraceStorage::CallTraceStorage() : _allocator(CALL_TRACE_CHUNK) {
_current_table = LongHashTable::allocate(NULL, INITIAL_CAPACITY);
}
CallTraceStorage::~CallTraceStorage() {
while (_current_table != NULL) {
_current_table = _current_table->destroy();
}
}
void CallTraceStorage::clear() {
while (_current_table->prev() != NULL) {
_current_table = _current_table->destroy();
}
_current_table->clear();
_allocator.clear();
}
void CallTraceStorage::collectTraces(std::map<u32, CallTrace*>& map) {
for (LongHashTable* table = _current_table; table != NULL; table = table->prev()) {
u64* keys = table->keys();
CallTraceSample* values = table->values();
u32 capacity = table->capacity();
for (u32 slot = 0; slot < capacity; slot++) {
if (keys[slot] != 0) {
map[capacity - (INITIAL_CAPACITY - 1) + slot] = values[slot].trace;
}
}
}
}
void CallTraceStorage::collectSamples(std::vector<CallTraceSample*>& samples) {
for (LongHashTable* table = _current_table; table != NULL; table = table->prev()) {
u64* keys = table->keys();
CallTraceSample* values = table->values();
u32 capacity = table->capacity();
for (u32 slot = 0; slot < capacity; slot++) {
if (keys[slot] != 0) {
samples.push_back(&values[slot]);
}
}
}
}
// Adaptation of MurmurHash64A by Austin Appleby
u64 CallTraceStorage::calcHash(int num_frames, ASGCT_CallFrame* frames) {
const u64 M = 0xc6a4a7935bd1e995ULL;
const int R = 47;
int len = num_frames * sizeof(ASGCT_CallFrame);
u64 h = len * M;
const u64* data = (const u64*)frames;
const u64* end = data + len / 8;
while (data != end) {
u64 k = *data++;
k *= M;
k ^= k >> R;
k *= M;
h ^= k;
h *= M;
}
if (len & 4) {
h ^= *(u32*)data;
h *= M;
}
h ^= h >> R;
h *= M;
h ^= h >> R;
return h;
}
CallTrace* CallTraceStorage::storeCallTrace(int num_frames, ASGCT_CallFrame* frames) {
const size_t header_size = sizeof(CallTrace) - sizeof(ASGCT_CallFrame);
CallTrace* buf = (CallTrace*)_allocator.alloc(header_size + num_frames * sizeof(ASGCT_CallFrame));
if (buf != NULL) {
buf->num_frames = num_frames;
// Do not use memcpy inside signal handler
for (int i = 0; i < num_frames; i++) {
buf->frames[i] = frames[i];
}
}
return buf;
}
CallTrace* CallTraceStorage::findCallTrace(LongHashTable* table, u64 hash) {
u64* keys = table->keys();
u32 capacity = table->capacity();
u32 slot = hash & (capacity - 1);
u32 step = 0;
while (keys[slot] != hash) {
if (keys[slot] == 0) {
return NULL;
}
if (++step >= capacity) {
return NULL;
}
slot = (slot + step) & (capacity - 1);
}
return table->values()[slot].trace;
}
u32 CallTraceStorage::put(int num_frames, ASGCT_CallFrame* frames, u64 counter) {
u64 hash = calcHash(num_frames, frames);
LongHashTable* table = _current_table;
u64* keys = table->keys();
u32 capacity = table->capacity();
u32 slot = hash & (capacity - 1);
u32 step = 0;
while (keys[slot] != hash) {
if (keys[slot] == 0) {
if (!__sync_bool_compare_and_swap(&keys[slot], 0, hash)) {
continue;
}
// Increment the table size, and if the load factor exceeds 0.75, reserve a new table
if (table->incSize() == capacity * 3 / 4) {
LongHashTable* new_table = LongHashTable::allocate(table, capacity * 2);
if (new_table != NULL) {
__sync_bool_compare_and_swap(&_current_table, table, new_table);
}
}
// Migrate from a previous table to save space
CallTrace* trace = table->prev() == NULL ? NULL : findCallTrace(table->prev(), hash);
if (trace == NULL) {
trace = storeCallTrace(num_frames, frames);
}
table->values()[slot].trace = trace;
break;
}
if (++step >= capacity) {
// Very unlikely case of a table overflow
return 0;
}
// Improved version of linear probing
slot = (slot + step) & (capacity - 1);
}
// TODO: check overhead
CallTraceSample& s = table->values()[slot];
atomicInc(s.samples);
atomicInc(s.counter, counter);
return capacity - (INITIAL_CAPACITY - 1) + slot;
}

60
src/callTraceStorage.h Normal file
View File

@@ -0,0 +1,60 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _CALLTRACESTORAGE_H
#define _CALLTRACESTORAGE_H
#include <map>
#include <vector>
#include "arch.h"
#include "linearAllocator.h"
#include "vmEntry.h"
class LongHashTable;
struct CallTrace {
int num_frames;
ASGCT_CallFrame frames[1];
};
struct CallTraceSample {
CallTrace* trace;
u64 samples;
u64 counter;
};
class CallTraceStorage {
private:
LinearAllocator _allocator;
LongHashTable* _current_table;
u64 calcHash(int num_frames, ASGCT_CallFrame* frames);
CallTrace* storeCallTrace(int num_frames, ASGCT_CallFrame* frames);
CallTrace* findCallTrace(LongHashTable* table, u64 hash);
public:
CallTraceStorage();
~CallTraceStorage();
void clear();
void collectTraces(std::map<u32, CallTrace*>& map);
void collectSamples(std::vector<CallTraceSample*>& samples);
u32 put(int num_frames, ASGCT_CallFrame* frames, u64 counter);
};
#endif // _CALLTRACESTORAGE

58
src/codeCache.cpp Executable file → Normal file
View File

@@ -22,21 +22,35 @@
void CodeCache::expand() {
CodeBlob* old_blobs = _blobs;
CodeBlob* new_blobs = new CodeBlob[_capacity * 2];
memcpy(new_blobs, old_blobs, _capacity * sizeof(CodeBlob));
int live = 0;
for (int i = 0; i < _count; i++) {
if (_blobs[i]._method != NULL) {
new_blobs[live++] = _blobs[i];
}
}
_count = live;
_capacity *= 2;
_blobs = new_blobs;
delete[] old_blobs;
}
void CodeCache::add(const void* start, int length, jmethodID method) {
void CodeCache::add(const void* start, int length, jmethodID method, bool update_bounds) {
if (_count >= _capacity) {
expand();
}
const void* end = (const char*)start + length;
_blobs[_count]._start = start;
_blobs[_count]._end = (const char*)start + length;
_blobs[_count]._end = end;
_blobs[_count]._method = method;
_count++;
if (update_bounds) {
if (start < _min_address) _min_address = start;
if (end > _max_address) _max_address = end;
}
}
void CodeCache::remove(const void* start, jmethodID method) {
@@ -50,7 +64,8 @@ void CodeCache::remove(const void* start, jmethodID method) {
jmethodID CodeCache::find(const void* address) {
for (int i = 0; i < _count; i++) {
if (address >= _blobs[i]._start && address < _blobs[i]._end) {
CodeBlob* cb = _blobs + i;
if (address >= cb->_start && address < cb->_end && cb->_method != NULL) {
return _blobs[i]._method;
}
}
@@ -71,8 +86,13 @@ NativeCodeCache::~NativeCodeCache() {
free(_name);
}
void NativeCodeCache::add(const void* start, int length, const char* name) {
CodeCache::add(start, length, (jmethodID)strdup(name));
void NativeCodeCache::add(const void* start, int length, const char* name, bool update_bounds) {
char* name_copy = strdup(name);
// Replace non-printable characters
for (char* s = name_copy; *s != 0; s++) {
if (*s < ' ') *s = '?';
}
CodeCache::add(start, length, (jmethodID)name_copy, update_bounds);
}
void NativeCodeCache::sort() {
@@ -80,8 +100,8 @@ void NativeCodeCache::sort() {
qsort(_blobs, _count, sizeof(CodeBlob), CodeBlob::comparator);
if (_min_address == NULL) _min_address = _blobs[0]._start;
if (_max_address == NULL) _max_address = _blobs[_count - 1]._end;
if (_min_address == NO_MIN_ADDRESS) _min_address = _blobs[0]._start;
if (_max_address == NO_MAX_ADDRESS) _max_address = _blobs[_count - 1]._end;
}
const char* NativeCodeCache::binarySearch(const void* address) {
@@ -99,15 +119,29 @@ const char* NativeCodeCache::binarySearch(const void* address) {
}
}
// Symbols with zero size can be valid functions: e.g. ASM entry points or kernel code
if (low > 0 && _blobs[low - 1]._start == _blobs[low - 1]._end) {
// Symbols with zero size can be valid functions: e.g. ASM entry points or kernel code.
// Also, in some cases (endless loop) the return address may point beyond the function.
if (low > 0 && (_blobs[low - 1]._start == _blobs[low - 1]._end || _blobs[low - 1]._end == address)) {
return (const char*)_blobs[low - 1]._method;
}
return _name;
}
const void* NativeCodeCache::findSymbol(const char* prefix) {
int prefix_len = strlen(prefix);
const void* NativeCodeCache::findSymbol(const char* name) {
for (int i = 0; i < _count; i++) {
const char* blob_name = (const char*)_blobs[i]._method;
if (blob_name != NULL && strcmp(blob_name, name) == 0) {
return _blobs[i]._start;
}
}
return NULL;
}
const void* NativeCodeCache::findSymbolByPrefix(const char* prefix) {
return findSymbolByPrefix(prefix, strlen(prefix));
}
const void* NativeCodeCache::findSymbolByPrefix(const char* prefix, int prefix_len) {
for (int i = 0; i < _count; i++) {
const char* blob_name = (const char*)_blobs[i]._method;
if (blob_name != NULL && strncmp(blob_name, prefix, prefix_len) == 0) {

31
src/codeCache.h Executable file → Normal file
View File

@@ -20,6 +20,9 @@
#include <jvmti.h>
#define NO_MIN_ADDRESS ((const void*)-1)
#define NO_MAX_ADDRESS ((const void*)0)
const int INITIAL_CODE_CACHE_CAPACITY = 1000;
@@ -50,6 +53,8 @@ class CodeCache {
int _capacity;
int _count;
CodeBlob* _blobs;
const void* _min_address;
const void* _max_address;
void expand();
@@ -58,13 +63,19 @@ class CodeCache {
_capacity = INITIAL_CODE_CACHE_CAPACITY;
_count = 0;
_blobs = new CodeBlob[_capacity];
_min_address = NO_MIN_ADDRESS;
_max_address = NO_MAX_ADDRESS;
}
~CodeCache() {
delete[] _blobs;
}
void add(const void* start, int length, jmethodID method);
bool contains(const void* address) {
return address >= _min_address && address < _max_address;
}
void add(const void* start, int length, jmethodID method, bool update_bounds = false);
void remove(const void* start, jmethodID method);
jmethodID find(const void* address);
};
@@ -73,11 +84,11 @@ class CodeCache {
class NativeCodeCache : public CodeCache {
private:
char* _name;
const void* _min_address;
const void* _max_address;
public:
NativeCodeCache(const char* name, const void* min_address = NULL, const void* max_address = NULL);
NativeCodeCache(const char* name,
const void* min_address = NO_MIN_ADDRESS,
const void* max_address = NO_MAX_ADDRESS);
~NativeCodeCache();
@@ -85,14 +96,12 @@ class NativeCodeCache : public CodeCache {
return _name;
}
bool contains(const void* address) {
return address >= _min_address && address < _max_address;
}
void add(const void* start, int length, const char* name);
void add(const void* start, int length, const char* name, bool update_bounds = false);
void sort();
const char* binarySearch(const void* address);
const void* findSymbol(const char* prefix);
const void* findSymbol(const char* name);
const void* findSymbolByPrefix(const char* prefix);
const void* findSymbolByPrefix(const char* prefix, int prefix_len);
};
#endif // _CODECACHE_H

View File

@@ -0,0 +1,442 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.TreeMap;
public class FlameGraph {
public String title = "Flame Graph";
public boolean reverse;
public double minwidth;
public int skip;
public String input;
public String output;
private final Frame root = new Frame();
private int depth;
private long mintotal;
public FlameGraph(String... args) {
for (int i = 0; i < args.length; i++) {
String arg = args[i];
if (!arg.startsWith("--") && !arg.isEmpty()) {
if (input == null) {
input = arg;
} else {
output = arg;
}
} else if (arg.equals("--title")) {
title = args[++i];
} else if (arg.equals("--reverse")) {
reverse = true;
} else if (arg.equals("--minwidth")) {
minwidth = Double.parseDouble(args[++i]);
} else if (arg.equals("--skip")) {
skip = Integer.parseInt(args[++i]);
}
}
}
public void parse() throws IOException {
parse(new InputStreamReader(new FileInputStream(input), StandardCharsets.UTF_8));
}
public void parse(Reader in) throws IOException {
try (BufferedReader br = new BufferedReader(in)) {
for (String line; (line = br.readLine()) != null; ) {
int space = line.lastIndexOf(' ');
if (space <= 0) continue;
String[] trace = line.substring(0, space).split(";");
long ticks = Long.parseLong(line.substring(space + 1));
addSample(trace, ticks);
}
}
}
public void addSample(String[] trace, long ticks) {
Frame frame = root;
if (reverse) {
for (int i = trace.length; --i >= skip; ) {
frame.total += ticks;
frame = frame.child(trace[i]);
}
} else {
for (int i = skip; i < trace.length; i++) {
frame.total += ticks;
frame = frame.child(trace[i]);
}
}
frame.total += ticks;
frame.self += ticks;
depth = Math.max(depth, trace.length);
}
public void dump() throws IOException {
if (output == null) {
dump(System.out);
} else {
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(output), 32768);
PrintStream out = new PrintStream(bos, false, "UTF-8")) {
dump(out);
}
}
}
public void dump(PrintStream out) {
out.print(applyReplacements(HEADER,
"{title}", title,
"{height}", (depth + 1) * 16,
"{depth}", depth + 1,
"{reverse}", reverse));
mintotal = (long) (root.total * minwidth / 100);
printFrame(out, "all", root, 0, 0);
out.print(FOOTER);
}
// Replace ${variables} in the given string with field values
private String applyReplacements(String s, Object... params) {
StringBuilder result = new StringBuilder(s.length() + 256);
int p = 0;
for (int q; (q = s.indexOf('$', p)) >= 0; ) {
result.append(s, p, q);
p = s.indexOf('}', q + 2) + 1;
String var = s.substring(q + 1, p);
for (int i = 0; i < params.length; i += 2) {
if (var.equals(params[i])) {
result.append(params[i + 1]);
break;
}
}
}
result.append(s, p, s.length());
return result.toString();
}
private void printFrame(PrintStream out, String title, Frame frame, int level, long x) {
int type = frameType(title);
title = stripSuffix(title);
if (title.indexOf('\'') >= 0) {
title = title.replace("'", "\\'");
}
out.println("f(" + level + "," + x + "," + frame.total + "," + type + ",'" + title + "')");
x += frame.self;
for (Map.Entry<String, Frame> e : frame.entrySet()) {
Frame child = e.getValue();
if (child.total >= mintotal) {
printFrame(out, e.getKey(), child, level + 1, x);
}
x += child.total;
}
}
private String stripSuffix(String title) {
int len = title.length();
if (len >= 4 && title.charAt(len - 1) == ']' && title.regionMatches(len - 4, "_[", 0, 2)) {
return title.substring(0, len - 4);
}
return title;
}
private int frameType(String title) {
if (title.endsWith("_[j]")) {
return 0;
} else if (title.endsWith("_[i]")) {
return 1;
} else if (title.endsWith("_[k]")) {
return 2;
} else if (title.contains("::") || title.startsWith("-[") || title.startsWith("+[")) {
return 3;
} else if (title.indexOf('/') > 0 || title.indexOf('.') > 0 && Character.isUpperCase(title.charAt(0))) {
return 0;
} else {
return 4;
}
}
public static void main(String[] args) throws IOException {
FlameGraph fg = new FlameGraph(args);
if (fg.input == null) {
System.out.println("Usage: java " + FlameGraph.class.getName() + " [options] input.collapsed [output.html]");
System.out.println();
System.out.println("Options:");
System.out.println(" --title TITLE");
System.out.println(" --reverse");
System.out.println(" --minwidth PERCENT");
System.out.println(" --skip FRAMES");
System.exit(1);
}
fg.parse();
fg.dump();
}
static class Frame extends TreeMap<String, Frame> {
long total;
long self;
Frame child(String title) {
Frame child = get(title);
if (child == null) {
put(title, child = new Frame());
}
return child;
}
}
private static final String HEADER = "<!DOCTYPE html>\n" +
"<html lang='en'>\n" +
"<head>\n" +
"<meta charset='utf-8'>\n" +
"<style>\n" +
"\tbody {margin: 0; padding: 10px; background-color: #ffffff}\n" +
"\th1 {margin: 5px 0 0 0; font-size: 18px; font-weight: normal; text-align: center}\n" +
"\theader {margin: -24px 0 5px 0; line-height: 24px}\n" +
"\tbutton {font: 12px sans-serif; cursor: pointer}\n" +
"\tp {margin: 5px 0 5px 0}\n" +
"\ta {color: #0366d6}\n" +
"\t#hl {position: absolute; display: none; overflow: hidden; white-space: nowrap; pointer-events: none; background-color: #ffffe0; outline: 1px solid #ffc000; height: 15px}\n" +
"\t#hl span {padding: 0 3px 0 3px}\n" +
"\t#status {overflow: hidden; white-space: nowrap}\n" +
"\t#match {overflow: hidden; white-space: nowrap; display: none; float: right; text-align: right}\n" +
"\t#reset {cursor: pointer}\n" +
"</style>\n" +
"</head>\n" +
"<body style='font: 12px Verdana, sans-serif'>\n" +
"<h1>${title}</h1>\n" +
"<header style='text-align: left'><button id='reverse' title='Reverse'>&#x1f53b;</button>&nbsp;&nbsp;<button id='search' title='Search'>&#x1f50d;</button></header>\n" +
"<header style='text-align: right'>Produced by <a href='https://github.com/jvm-profiling-tools/async-profiler'>async-profiler</a></header>\n" +
"<canvas id='canvas' style='width: 100%; height: ${height}px'></canvas>\n" +
"<div id='hl'><span></span></div>\n" +
"<p id='match'>Matched: <span id='matchval'></span> <span id='reset' title='Clear'>&#x274c;</span></p>\n" +
"<p id='status'>&nbsp;</p>\n" +
"<script>\n" +
"\t// Copyright 2020 Andrei Pangin\n" +
"\t// Licensed under the Apache License, Version 2.0.\n" +
"\t'use strict';\n" +
"\tvar root, rootLevel, px, pattern;\n" +
"\tvar reverse = ${reverse};\n" +
"\tconst levels = Array(${depth});\n" +
"\tfor (let h = 0; h < levels.length; h++) {\n" +
"\t\tlevels[h] = [];\n" +
"\t}\n" +
"\n" +
"\tconst canvas = document.getElementById('canvas');\n" +
"\tconst c = canvas.getContext('2d');\n" +
"\tconst hl = document.getElementById('hl');\n" +
"\tconst status = document.getElementById('status');\n" +
"\n" +
"\tconst canvasWidth = canvas.offsetWidth;\n" +
"\tconst canvasHeight = canvas.offsetHeight;\n" +
"\tcanvas.style.width = canvasWidth + 'px';\n" +
"\tcanvas.width = canvasWidth * (devicePixelRatio || 1);\n" +
"\tcanvas.height = canvasHeight * (devicePixelRatio || 1);\n" +
"\tif (devicePixelRatio) c.scale(devicePixelRatio, devicePixelRatio);\n" +
"\tc.font = document.body.style.font;\n" +
"\n" +
"\tconst palette = [\n" +
"\t\t[0x50e150, 30, 30, 30],\n" +
"\t\t[0x50bebe, 30, 30, 30],\n" +
"\t\t[0xe17d00, 30, 30, 0],\n" +
"\t\t[0xc8c83c, 30, 30, 10],\n" +
"\t\t[0xe15a5a, 30, 40, 40],\n" +
"\t];\n" +
"\n" +
"\tfunction getColor(p) {\n" +
"\t\tconst v = Math.random();\n" +
"\t\treturn '#' + (p[0] + ((p[1] * v) << 16 | (p[2] * v) << 8 | (p[3] * v))).toString(16);\n" +
"\t}\n" +
"\n" +
"\tfunction f(level, left, width, type, title) {\n" +
"\t\tlevels[level].push({left: left, width: width, color: getColor(palette[type]), title: title});\n" +
"\t}\n" +
"\n" +
"\tfunction samples(n) {\n" +
"\t\treturn n === 1 ? '1 sample' : n.toString().replace(/\\B(?=(\\d{3})+(?!\\d))/g, ',') + ' samples';\n" +
"\t}\n" +
"\n" +
"\tfunction pct(a, b) {\n" +
"\t\treturn a >= b ? '100' : (100 * a / b).toFixed(2);\n" +
"\t}\n" +
"\n" +
"\tfunction findFrame(frames, x) {\n" +
"\t\tlet left = 0;\n" +
"\t\tlet right = frames.length - 1;\n" +
"\n" +
"\t\twhile (left <= right) {\n" +
"\t\t\tconst mid = (left + right) >>> 1;\n" +
"\t\t\tconst f = frames[mid];\n" +
"\n" +
"\t\t\tif (f.left > x) {\n" +
"\t\t\t\tright = mid - 1;\n" +
"\t\t\t} else if (f.left + f.width <= x) {\n" +
"\t\t\t\tleft = mid + 1;\n" +
"\t\t\t} else {\n" +
"\t\t\t\treturn f;\n" +
"\t\t\t}\n" +
"\t\t}\n" +
"\n" +
"\t\tif (frames[left] && (frames[left].left - x) * px < 0.5) return frames[left];\n" +
"\t\tif (frames[right] && (x - (frames[right].left + frames[right].width)) * px < 0.5) return frames[right];\n" +
"\n" +
"\t\treturn null;\n" +
"\t}\n" +
"\n" +
"\tfunction search(r) {\n" +
"\t\tif (r && (r = prompt('Enter regexp to search:', '')) === null) {\n" +
"\t\t\treturn;\n" +
"\t\t}\n" +
"\n" +
"\t\tpattern = r ? RegExp(r) : undefined;\n" +
"\t\tconst matched = render(root, rootLevel);\n" +
"\t\tdocument.getElementById('matchval').textContent = pct(matched, root.width) + '%';\n" +
"\t\tdocument.getElementById('match').style.display = r ? 'inherit' : 'none';\n" +
"\t}\n" +
"\n" +
"\tfunction render(newRoot, newLevel) {\n" +
"\t\tif (root) {\n" +
"\t\t\tc.fillStyle = '#ffffff';\n" +
"\t\t\tc.fillRect(0, 0, canvasWidth, canvasHeight);\n" +
"\t\t}\n" +
"\n" +
"\t\troot = newRoot || levels[0][0];\n" +
"\t\trootLevel = newLevel || 0;\n" +
"\t\tpx = canvasWidth / root.width;\n" +
"\n" +
"\t\tconst x0 = root.left;\n" +
"\t\tconst x1 = x0 + root.width;\n" +
"\t\tconst marked = [];\n" +
"\n" +
"\t\tfunction mark(f) {\n" +
"\t\t\treturn marked[f.left] >= f.width || (marked[f.left] = f.width);\n" +
"\t\t}\n" +
"\n" +
"\t\tfunction totalMarked() {\n" +
"\t\t\tlet total = 0;\n" +
"\t\t\tlet left = 0;\n" +
"\t\t\tfor (let x in marked) {\n" +
"\t\t\t\tif (+x >= left) {\n" +
"\t\t\t\t\ttotal += marked[x];\n" +
"\t\t\t\t\tleft = +x + marked[x];\n" +
"\t\t\t\t}\n" +
"\t\t\t}\n" +
"\t\t\treturn total;\n" +
"\t\t}\n" +
"\n" +
"\t\tfunction drawFrame(f, y, alpha) {\n" +
"\t\t\tif (f.left < x1 && f.left + f.width > x0) {\n" +
"\t\t\t\tc.fillStyle = pattern && f.title.match(pattern) && mark(f) ? '#ee00ee' : f.color;\n" +
"\t\t\t\tc.fillRect((f.left - x0) * px, y, f.width * px, 15);\n" +
"\n" +
"\t\t\t\tif (f.width * px >= 21) {\n" +
"\t\t\t\t\tconst chars = Math.floor(f.width * px / 7);\n" +
"\t\t\t\t\tconst title = f.title.length <= chars ? f.title : f.title.substring(0, chars - 2) + '..';\n" +
"\t\t\t\t\tc.fillStyle = '#000000';\n" +
"\t\t\t\t\tc.fillText(title, Math.max(f.left - x0, 0) * px + 3, y + 12, f.width * px - 6);\n" +
"\t\t\t\t}\n" +
"\n" +
"\t\t\t\tif (alpha) {\n" +
"\t\t\t\t\tc.fillStyle = 'rgba(255, 255, 255, 0.5)';\n" +
"\t\t\t\t\tc.fillRect((f.left - x0) * px, y, f.width * px, 15);\n" +
"\t\t\t\t}\n" +
"\t\t\t}\n" +
"\t\t}\n" +
"\n" +
"\t\tfor (let h = 0; h < levels.length; h++) {\n" +
"\t\t\tconst y = reverse ? h * 16 : canvasHeight - (h + 1) * 16;\n" +
"\t\t\tconst frames = levels[h];\n" +
"\t\t\tfor (let i = 0; i < frames.length; i++) {\n" +
"\t\t\t\tdrawFrame(frames[i], y, h < rootLevel);\n" +
"\t\t\t}\n" +
"\t\t}\n" +
"\n" +
"\t\treturn totalMarked();\n" +
"\t}\n" +
"\n" +
"\tcanvas.onmousemove = function() {\n" +
"\t\tconst h = Math.floor((reverse ? event.offsetY : (canvasHeight - event.offsetY)) / 16);\n" +
"\t\tif (h >= 0 && h < levels.length) {\n" +
"\t\t\tconst f = findFrame(levels[h], event.offsetX / px + root.left);\n" +
"\t\t\tif (f) {\n" +
"\t\t\t\thl.style.left = (Math.max(f.left - root.left, 0) * px + canvas.offsetLeft) + 'px';\n" +
"\t\t\t\thl.style.width = (Math.min(f.width, root.width) * px) + 'px';\n" +
"\t\t\t\thl.style.top = ((reverse ? h * 16 : canvasHeight - (h + 1) * 16) + canvas.offsetTop) + 'px';\n" +
"\t\t\t\thl.firstChild.textContent = f.title;\n" +
"\t\t\t\thl.style.display = 'block';\n" +
"\t\t\t\tcanvas.title = f.title + '\\n(' + samples(f.width) + ', ' + pct(f.width, levels[0][0].width) + '%)';\n" +
"\t\t\t\tcanvas.style.cursor = 'pointer';\n" +
"\t\t\t\tcanvas.onclick = function() {\n" +
"\t\t\t\t\tif (f != root) {\n" +
"\t\t\t\t\t\trender(f, h);\n" +
"\t\t\t\t\t\tcanvas.onmousemove();\n" +
"\t\t\t\t\t}\n" +
"\t\t\t\t};\n" +
"\t\t\t\tstatus.textContent = 'Function: ' + canvas.title;\n" +
"\t\t\t\treturn;\n" +
"\t\t\t}\n" +
"\t\t}\n" +
"\t\tcanvas.onmouseout();\n" +
"\t}\n" +
"\n" +
"\tcanvas.onmouseout = function() {\n" +
"\t\thl.style.display = 'none';\n" +
"\t\tstatus.textContent = '\\xa0';\n" +
"\t\tcanvas.title = '';\n" +
"\t\tcanvas.style.cursor = '';\n" +
"\t\tcanvas.onclick = '';\n" +
"\t}\n" +
"\n" +
"\tdocument.getElementById('reverse').onclick = function() {\n" +
"\t\treverse = !reverse;\n" +
"\t\trender();\n" +
"\t}\n" +
"\n" +
"\tdocument.getElementById('search').onclick = function() {\n" +
"\t\tsearch(true);\n" +
"\t}\n" +
"\n" +
"\tdocument.getElementById('reset').onclick = function() {\n" +
"\t\tsearch(false);\n" +
"\t}\n" +
"\n" +
"\twindow.onkeydown = function() {\n" +
"\t\tif (event.ctrlKey && event.keyCode === 70) {\n" +
"\t\t\tevent.preventDefault();\n" +
"\t\t\tsearch(true);\n" +
"\t\t} else if (event.keyCode === 27) {\n" +
"\t\t\tsearch(false);\n" +
"\t\t}\n" +
"\t}\n";
private static final String FOOTER = "render();\n" +
"</script></body></html>\n";
}

View File

@@ -0,0 +1 @@
Main-Class: Main

31
src/converter/Main.java Normal file
View File

@@ -0,0 +1,31 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Main entry point of jar.
* Lists available converters.
*/
public class Main {
public static void main(String[] args) {
System.out.println("Usage: java -cp converter.jar <Converter> [options] <input> <output>");
System.out.println();
System.out.println("Available converters:");
System.out.println(" FlameGraph input.collapsed output.html");
System.out.println(" jfr2flame input.jfr output.html");
System.out.println(" jfr2nflx input.jfr output.nflx");
}
}

View File

@@ -0,0 +1,92 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import one.jfr.ClassRef;
import one.jfr.Dictionary;
import one.jfr.JfrReader;
import one.jfr.MethodRef;
import one.jfr.StackTrace;
import java.nio.charset.StandardCharsets;
/**
* Converts .jfr output produced by async-profiler to HTML Flame Graph.
*/
public class jfr2flame {
private static final int FRAME_KERNEL = 5;
private final JfrReader jfr;
private final Dictionary<String> methodNames = new Dictionary<>();
public jfr2flame(JfrReader jfr) {
this.jfr = jfr;
}
public void convert(final FlameGraph fg) {
// Don't use lambda for faster startup
jfr.stackTraces.forEach(new Dictionary.Visitor<StackTrace>() {
@Override
public void visit(long id, StackTrace stackTrace) {
long[] methods = stackTrace.methods;
byte[] types = stackTrace.types;
String[] trace = new String[methods.length];
for (int i = 0; i < methods.length; i++) {
trace[trace.length - 1 - i] = getMethodName(methods[i], types[i]);
}
fg.addSample(trace, stackTrace.samples);
}
});
}
private String getMethodName(long methodId, int type) {
String result = methodNames.get(methodId);
if (result != null) {
return result;
}
MethodRef method = jfr.methods.get(methodId);
ClassRef cls = jfr.classes.get(method.cls);
byte[] className = jfr.symbols.get(cls.name);
byte[] methodName = jfr.symbols.get(method.name);
if (className == null || className.length == 0) {
String methodStr = new String(methodName, StandardCharsets.UTF_8);
result = type == FRAME_KERNEL ? methodStr + "_[k]" : methodStr;
} else {
String classStr = new String(className, StandardCharsets.UTF_8);
String methodStr = new String(methodName, StandardCharsets.UTF_8);
result = classStr + '.' + methodStr + "_[j]";
}
methodNames.put(methodId, result);
return result;
}
public static void main(String[] args) throws Exception {
FlameGraph fg = new FlameGraph(args);
if (fg.input == null) {
System.out.println("Usage: java " + jfr2flame.class.getName() + " [options] input.jfr [output.html]");
System.exit(1);
}
try (JfrReader jfr = new JfrReader(fg.input)) {
new jfr2flame(jfr).convert(fg);
}
fg.dump();
}
}

161
src/converter/jfr2nflx.java Normal file
View File

@@ -0,0 +1,161 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import one.jfr.ClassRef;
import one.jfr.Dictionary;
import one.jfr.JfrReader;
import one.jfr.MethodRef;
import one.jfr.Sample;
import one.jfr.StackTrace;
import one.proto.Proto;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
/**
* Converts .jfr output produced by async-profiler to nflxprofile format
* as described in https://github.com/Netflix/nflxprofile/blob/master/nflxprofile.proto.
* The result nflxprofile can be opened and analyzed with FlameScope.
*/
public class jfr2nflx {
private static final String[] FRAME_TYPE = {"jit", "jit", "inlined", "user", "user", "kernel"};
private static final byte[] NO_STACK = "[no_stack]".getBytes();
private final JfrReader jfr;
public jfr2nflx(JfrReader jfr) {
this.jfr = jfr;
}
public void dump(OutputStream out) throws IOException {
long startTime = System.nanoTime();
int samples = jfr.samples.size();
long durationTicks = samples == 0 ? 0 : jfr.samples.get(samples - 1).time - jfr.startTicks + 1;
final Proto profile = new Proto(200000)
.field(1, 0.0)
.field(2, Math.max(jfr.durationNanos / 1e9, durationTicks / (double) jfr.ticksPerSec))
.field(3, packSamples())
.field(4, packDeltas())
.field(6, "async-profiler")
.field(8, new Proto(32).field(1, "has_node_stack").field(2, "true"))
.field(8, new Proto(32).field(1, "has_samples_tid").field(2, "true"))
.field(11, packTids());
final Proto nodes = new Proto(10000);
final Proto node = new Proto(10000);
// Don't use lambda for faster startup
jfr.stackTraces.forEach(new Dictionary.Visitor<StackTrace>() {
@Override
public void visit(long id, StackTrace stackTrace) {
profile.field(5, nodes
.field(1, (int) id)
.field(2, packNode(node, stackTrace)));
nodes.reset();
node.reset();
}
});
out.write(profile.buffer(), 0, profile.size());
long endTime = System.nanoTime();
System.out.println("Wrote " + profile.size() + " bytes in " + (endTime - startTime) / 1e9 + " s");
}
private Proto packNode(Proto node, StackTrace stackTrace) {
long[] methods = stackTrace.methods;
byte[] types = stackTrace.types;
int top = methods.length - 1;
node.field(1, top >= 0 ? getMethodName(methods[top]) : NO_STACK);
node.field(2, 1);
node.field(4, top >= 0 ? FRAME_TYPE[types[top]] : "user");
for (Proto frame = new Proto(100); --top >= 0; frame.reset()) {
node.field(10, frame
.field(1, getMethodName(methods[top]))
.field(2, FRAME_TYPE[types[top]]));
}
return node;
}
private Proto packSamples() {
Proto proto = new Proto(10000);
for (Sample sample : jfr.samples) {
proto.writeInt(sample.stackTraceId);
}
return proto;
}
private Proto packDeltas() {
Proto proto = new Proto(10000);
double ticksPerSec = jfr.ticksPerSec;
long prevTime = jfr.startTicks;
for (Sample sample : jfr.samples) {
proto.writeDouble((sample.time - prevTime) / ticksPerSec);
prevTime = sample.time;
}
return proto;
}
private Proto packTids() {
Proto proto = new Proto(10000);
for (Sample sample : jfr.samples) {
proto.writeInt(sample.tid);
}
return proto;
}
private byte[] getMethodName(long methodId) {
MethodRef method = jfr.methods.get(methodId);
ClassRef cls = jfr.classes.get(method.cls);
byte[] className = jfr.symbols.get(cls.name);
byte[] methodName = jfr.symbols.get(method.name);
if (className == null || className.length == 0) {
return methodName;
} else {
byte[] fullName = Arrays.copyOf(className, className.length + 1 + methodName.length);
fullName[className.length] = '.';
System.arraycopy(methodName, 0, fullName, className.length + 1, methodName.length);
return fullName;
}
}
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.out.println("Usage: java " + jfr2nflx.class.getName() + " input.jfr output.nflx");
System.exit(1);
}
File dst = new File(args[1]);
if (dst.isDirectory()) {
dst = new File(dst, new File(args[0]).getName().replace(".jfr", ".nflx"));
}
try (JfrReader jfr = new JfrReader(args[0]);
FileOutputStream out = new FileOutputStream(dst)) {
new jfr2nflx(jfr).dump(out);
}
}
}

View File

@@ -0,0 +1,25 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.jfr;
public class ClassRef {
public final long name;
public ClassRef(long name) {
this.name = name;
}
}

View File

@@ -0,0 +1,107 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.jfr;
/**
* Fast and compact long->Object map.
*/
public class Dictionary<T> {
private static final int INITIAL_CAPACITY = 16;
private long[] keys;
private Object[] values;
private int size;
public Dictionary() {
this.keys = new long[INITIAL_CAPACITY];
this.values = new Object[INITIAL_CAPACITY];
}
public void put(long key, T value) {
if (key == 0) {
throw new IllegalArgumentException("Zero key not allowed");
}
if (++size * 2 > keys.length) {
resize(keys.length * 2);
}
int mask = keys.length - 1;
int i = hashCode(key) & mask;
while (keys[i] != 0 && keys[i] != key) {
i = (i + 1) & mask;
}
keys[i] = key;
values[i] = value;
}
@SuppressWarnings("unchecked")
public T get(long key) {
int mask = keys.length - 1;
int i = hashCode(key) & mask;
while (keys[i] != key && keys[i] != 0) {
i = (i + 1) & mask;
}
return (T) values[i];
}
@SuppressWarnings("unchecked")
public void forEach(Visitor<T> visitor) {
for (int i = 0; i < keys.length; i++) {
if (keys[i] != 0) {
visitor.visit(keys[i], (T) values[i]);
}
}
}
public int preallocate(int count) {
int newSize = size + count;
if (newSize * 2 > keys.length) {
resize(Integer.highestOneBit(newSize * 4 - 1));
}
return count;
}
private void resize(int newCapacity) {
long[] newKeys = new long[newCapacity];
Object[] newValues = new Object[newCapacity];
int mask = newKeys.length - 1;
for (int i = 0; i < keys.length; i++) {
if (keys[i] != 0) {
for (int j = hashCode(keys[i]) & mask; ; j = (j + 1) & mask) {
if (newKeys[j] == 0) {
newKeys[j] = keys[i];
newValues[j] = values[i];
break;
}
}
}
}
keys = newKeys;
values = newValues;
}
private static int hashCode(long key) {
return (int) (key ^ (key >>> 32));
}
public interface Visitor<T> {
void visit(long key, T value);
}
}

View File

@@ -0,0 +1,23 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.jfr;
class Element {
void addChild(Element e) {
}
}

View File

@@ -0,0 +1,49 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.jfr;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
class JfrClass extends Element {
final int id;
final String name;
final List<JfrField> fields;
JfrClass(Map<String, String> attributes) {
this.id = Integer.parseInt(attributes.get("id"));
this.name = attributes.get("name");
this.fields = new ArrayList<>(2);
}
@Override
void addChild(Element e) {
if (e instanceof JfrField) {
fields.add((JfrField) e);
}
}
JfrField field(String name) {
for (JfrField field : fields) {
if (field.name.equals(name)) {
return field;
}
}
return null;
}
}

View File

@@ -0,0 +1,31 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.jfr;
import java.util.Map;
class JfrField extends Element {
final String name;
final int type;
final boolean constantPool;
JfrField(Map<String, String> attributes) {
this.name = attributes.get("name");
this.type = Integer.parseInt(attributes.get("class"));
this.constantPool = "true".equals(attributes.get("constantPool"));
}
}

View File

@@ -0,0 +1,384 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.jfr;
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Parses JFR output produced by async-profiler.
*/
public class JfrReader implements Closeable {
private static final int CHUNK_HEADER_SIZE = 68;
private static final int CPOOL_OFFSET = 16;
private static final int META_OFFSET = 24;
private final FileChannel ch;
private final ByteBuffer buf;
public final long startNanos;
public final long durationNanos;
public final long startTicks;
public final long ticksPerSec;
public final Dictionary<JfrClass> types = new Dictionary<>();
public final Map<String, JfrClass> typesByName = new HashMap<>();
public final Dictionary<String> threads = new Dictionary<>();
public final Dictionary<ClassRef> classes = new Dictionary<>();
public final Dictionary<byte[]> symbols = new Dictionary<>();
public final Dictionary<MethodRef> methods = new Dictionary<>();
public final Dictionary<StackTrace> stackTraces = new Dictionary<>();
public final Map<Integer, String> frameTypes = new HashMap<>();
public final Map<Integer, String> threadStates = new HashMap<>();
public final List<Sample> samples = new ArrayList<>();
public JfrReader(String fileName) throws IOException {
this.ch = FileChannel.open(Paths.get(fileName), StandardOpenOption.READ);
this.buf = ch.map(FileChannel.MapMode.READ_ONLY, 0, ch.size());
if (buf.getInt(0) != 0x464c5200) {
throw new IOException("Not a valid JFR file");
}
int version = buf.getInt(4);
if (version < 0x20000 || version > 0x2ffff) {
throw new IOException("Unsupported JFR version: " + (version >>> 16) + "." + (version & 0xffff));
}
this.startNanos = buf.getLong(32);
this.durationNanos = buf.getLong(40);
this.startTicks = buf.getLong(48);
this.ticksPerSec = buf.getLong(56);
readMeta();
readConstantPool();
readEvents();
}
@Override
public void close() throws IOException {
ch.close();
}
private void readMeta() {
buf.position(buf.getInt(META_OFFSET + 4));
getVarint();
getVarint();
getVarlong();
getVarlong();
getVarlong();
String[] strings = new String[getVarint()];
for (int i = 0; i < strings.length; i++) {
strings[i] = getString();
}
readElement(strings);
}
private Element readElement(String[] strings) {
String name = strings[getVarint()];
int attributeCount = getVarint();
Map<String, String> attributes = new HashMap<>(attributeCount);
for (int i = 0; i < attributeCount; i++) {
attributes.put(strings[getVarint()], strings[getVarint()]);
}
Element e = createElement(name, attributes);
int childCount = getVarint();
for (int i = 0; i < childCount; i++) {
e.addChild(readElement(strings));
}
return e;
}
private Element createElement(String name, Map<String, String> attributes) {
switch (name) {
case "class": {
JfrClass type = new JfrClass(attributes);
if (!attributes.containsKey("superType")) {
types.put(type.id, type);
}
typesByName.put(type.name, type);
return type;
}
case "field":
return new JfrField(attributes);
default:
return new Element();
}
}
private void readConstantPool() {
int offset = buf.getInt(CPOOL_OFFSET + 4);
while (true) {
buf.position(offset);
getVarint();
getVarint();
getVarlong();
getVarlong();
long delta = getVarlong();
getVarint();
int poolCount = getVarint();
for (int i = 0; i < poolCount; i++) {
int type = getVarint();
readConstants(types.get(type));
}
if (delta == 0) {
break;
}
offset += delta;
}
}
private void readConstants(JfrClass type) {
switch (type.name) {
case "jdk.types.ChunkHeader":
buf.position(buf.position() + (CHUNK_HEADER_SIZE + 3));
break;
case "java.lang.Thread":
readThreads(type.field("group") != null);
break;
case "java.lang.Class":
readClasses(type.field("hidden") != null);
break;
case "jdk.types.Symbol":
readSymbols();
break;
case "jdk.types.Method":
readMethods();
break;
case "jdk.types.StackTrace":
readStackTraces();
break;
case "jdk.types.FrameType":
readMap(frameTypes);
break;
case "jdk.types.ThreadState":
readMap(threadStates);
break;
default:
readOtherConstants(type.fields);
}
}
private void readThreads(boolean hasGroup) {
int count = threads.preallocate(getVarint());
for (int i = 0; i < count; i++) {
long id = getVarlong();
String osName = getString();
int osThreadId = getVarint();
String javaName = getString();
long javaThreadId = getVarlong();
if (hasGroup) getVarlong();
threads.put(id, javaName != null ? javaName : osName);
}
}
private void readClasses(boolean hasHidden) {
int count = classes.preallocate(getVarint());
for (int i = 0; i < count; i++) {
long id = getVarlong();
long loader = getVarlong();
long name = getVarlong();
long pkg = getVarlong();
int modifiers = getVarint();
if (hasHidden) getVarint();
classes.put(id, new ClassRef(name));
}
}
private void readMethods() {
int count = methods.preallocate(getVarint());
for (int i = 0; i < count; i++) {
long id = getVarlong();
long cls = getVarlong();
long name = getVarlong();
long sig = getVarlong();
int modifiers = getVarint();
int hidden = getVarint();
methods.put(id, new MethodRef(cls, name, sig));
}
}
private void readStackTraces() {
int count = stackTraces.preallocate(getVarint());
for (int i = 0; i < count; i++) {
long id = getVarlong();
int truncated = getVarint();
StackTrace stackTrace = readStackTrace();
stackTraces.put(id, stackTrace);
}
}
private StackTrace readStackTrace() {
int depth = getVarint();
long[] methods = new long[depth];
byte[] types = new byte[depth];
for (int i = 0; i < depth; i++) {
methods[i] = getVarlong();
int line = getVarint();
int bci = getVarint();
types[i] = buf.get();
}
return new StackTrace(methods, types);
}
private void readSymbols() {
int count = symbols.preallocate(getVarint());
for (int i = 0; i < count; i++) {
long id = getVarlong();
if (buf.get() != 3) {
throw new IllegalArgumentException("Invalid symbol encoding");
}
symbols.put(id, getBytes());
}
}
private void readMap(Map<Integer, String> map) {
int count = getVarint();
for (int i = 0; i < count; i++) {
map.put(getVarint(), getString());
}
}
private void readOtherConstants(List<JfrField> fields) {
int stringType = getTypeId("java.lang.String");
boolean[] numeric = new boolean[fields.size()];
for (int i = 0; i < numeric.length; i++) {
JfrField f = fields.get(i);
numeric[i] = f.constantPool || f.type != stringType;
}
int count = getVarint();
for (int i = 0; i < count; i++) {
getVarlong();
readFields(numeric);
}
}
private void readFields(boolean[] numeric) {
for (boolean n : numeric) {
if (n) {
getVarlong();
} else {
getString();
}
}
}
private void readEvents() {
int executionSample = getTypeId("jdk.ExecutionSample");
int nativeMethodSample = getTypeId("jdk.NativeMethodSample");
buf.position(CHUNK_HEADER_SIZE);
while (buf.hasRemaining()) {
int position = buf.position();
int size = getVarint();
int type = getVarint();
if (type == executionSample || type == nativeMethodSample) {
readExecutionSample();
} else {
buf.position(position + size);
}
}
Collections.sort(samples);
}
private void readExecutionSample() {
long time = getVarlong();
int tid = getVarint();
int stackTraceId = getVarint();
int threadState = getVarint();
samples.add(new Sample(time, tid, stackTraceId, threadState));
StackTrace stackTrace = stackTraces.get(stackTraceId);
if (stackTrace != null) {
stackTrace.samples++;
}
}
private int getTypeId(String typeName) {
JfrClass type = typesByName.get(typeName);
return type != null ? type.id : -1;
}
private int getVarint() {
int result = 0;
for (int shift = 0; ; shift += 7) {
byte b = buf.get();
result |= (b & 0x7f) << shift;
if (b >= 0) {
return result;
}
}
}
private long getVarlong() {
long result = 0;
for (int shift = 0; shift < 56; shift += 7) {
byte b = buf.get();
result |= (b & 0x7fL) << shift;
if (b >= 0) {
return result;
}
}
return result | (buf.get() & 0xffL) << 56;
}
private String getString() {
switch (buf.get()) {
case 0:
return null;
case 1:
return "";
case 3:
return new String(getBytes(), StandardCharsets.UTF_8);
case 4: {
char[] chars = new char[getVarint()];
for (int i = 0; i < chars.length; i++) {
chars[i] = (char) getVarint();
}
return new String(chars);
}
case 5:
return new String(getBytes(), StandardCharsets.ISO_8859_1);
default:
throw new IllegalArgumentException("Invalid string encoding");
}
}
private byte[] getBytes() {
byte[] bytes = new byte[getVarint()];
buf.get(bytes);
return bytes;
}
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.jfr;
public class MethodRef {
public final long cls;
public final long name;
public final long sig;
public MethodRef(long cls, long name, long sig) {
this.cls = cls;
this.name = name;
this.sig = sig;
}
}

View File

@@ -0,0 +1,36 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.jfr;
public class Sample implements Comparable<Sample> {
public final long time;
public final int tid;
public final int stackTraceId;
public final int threadState;
public Sample(long time, int tid, int stackTraceId, int threadState) {
this.time = time;
this.tid = tid;
this.stackTraceId = stackTraceId;
this.threadState = threadState;
}
@Override
public int compareTo(Sample o) {
return Long.compare(time, o.time);
}
}

View File

@@ -0,0 +1,28 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.jfr;
public class StackTrace {
public final long[] methods;
public final byte[] types;
public long samples;
public StackTrace(long[] methods, byte[] types) {
this.methods = methods;
this.types = types;
}
}

View File

@@ -0,0 +1,127 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.proto;
import java.util.Arrays;
/**
* Simplified implementation of Protobuf writer, capable of encoding
* varints, doubles, ASCII strings and embedded messages
*/
public class Proto {
private byte[] buf;
private int pos;
public Proto(int capacity) {
this.buf = new byte[capacity];
}
public byte[] buffer() {
return buf;
}
public int size() {
return pos;
}
public void reset() {
pos = 0;
}
public Proto field(int index, int n) {
tag(index, 0);
writeInt(n);
return this;
}
public Proto field(int index, double d) {
tag(index, 1);
writeDouble(d);
return this;
}
public Proto field(int index, String s) {
tag(index, 2);
writeString(s);
return this;
}
public Proto field(int index, byte[] bytes) {
tag(index, 2);
writeBytes(bytes, 0, bytes.length);
return this;
}
public Proto field(int index, Proto proto) {
tag(index, 2);
writeBytes(proto.buf, 0, proto.pos);
return this;
}
public void writeInt(int n) {
int length = n == 0 ? 1 : (38 - Integer.numberOfLeadingZeros(n)) / 7;
ensureCapacity(length);
while (n > 0x7f) {
buf[pos++] = (byte) (0x80 | (n & 0x7f));
n >>>= 7;
}
buf[pos++] = (byte) n;
}
public void writeDouble(double d) {
ensureCapacity(8);
long n = Double.doubleToRawLongBits(d);
buf[pos] = (byte) n;
buf[pos + 1] = (byte) (n >>> 8);
buf[pos + 2] = (byte) (n >>> 16);
buf[pos + 3] = (byte) (n >>> 24);
buf[pos + 4] = (byte) (n >>> 32);
buf[pos + 5] = (byte) (n >>> 40);
buf[pos + 6] = (byte) (n >>> 48);
buf[pos + 7] = (byte) (n >>> 56);
pos += 8;
}
public void writeString(String s) {
int length = s.length();
writeInt(length);
ensureCapacity(length);
for (int i = 0; i < length; i++) {
buf[pos++] = (byte) s.charAt(i);
}
}
public void writeBytes(byte[] bytes, int offset, int length) {
writeInt(length);
ensureCapacity(length);
System.arraycopy(bytes, offset, buf, pos, length);
pos += length;
}
private void tag(int index, int type) {
ensureCapacity(1);
buf[pos++] = (byte) (index << 3 | type);
}
private void ensureCapacity(int length) {
if (pos + length > buf.length) {
buf = Arrays.copyOf(buf, Math.max(pos + length, buf.length * 2));
}
}
}

126
src/dictionary.cpp Normal file
View File

@@ -0,0 +1,126 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h>
#include <string.h>
#include "dictionary.h"
#include "arch.h"
static inline char* allocateKey(const char* key, size_t length) {
char* result = (char*)malloc(length + 1);
memcpy(result, key, length);
result[length] = 0;
return result;
}
static inline bool keyEquals(const char* candidate, const char* key, size_t length) {
return strncmp(candidate, key, length) == 0 && candidate[length] == 0;
}
Dictionary::Dictionary() {
_table = (DictTable*)calloc(1, sizeof(DictTable));
_table->base_index = _base_index = 1;
}
Dictionary::~Dictionary() {
clear(_table);
free(_table);
}
void Dictionary::clear() {
clear(_table);
memset(_table, 0, sizeof(DictTable));
_table->base_index = _base_index = 1;
}
void Dictionary::clear(DictTable* table) {
for (int i = 0; i < ROWS; i++) {
DictRow* row = &table->rows[i];
for (int j = 0; j < CELLS; j++) {
free(row->keys[j]);
}
if (row->next != NULL) {
clear(row->next);
free(row->next);
}
}
}
// Many popular symbols are quite short, e.g. "[B", "()V" etc.
// FNV-1a is reasonably fast and sufficiently random.
unsigned int Dictionary::hash(const char* key, size_t length) {
unsigned int h = 2166136261U;
for (size_t i = 0; i < length; i++) {
h = (h ^ key[i]) * 16777619;
}
return h;
}
unsigned int Dictionary::lookup(const char* key) {
return lookup(key, strlen(key));
}
unsigned int Dictionary::lookup(const char* key, size_t length) {
DictTable* table = _table;
unsigned int h = hash(key, length);
while (true) {
DictRow* row = &table->rows[h % ROWS];
for (int c = 0; c < CELLS; c++) {
if (row->keys[c] == NULL) {
char* new_key = allocateKey(key, length);
if (__sync_bool_compare_and_swap(&row->keys[c], NULL, new_key)) {
return table->index(h % ROWS, c);
}
free(new_key);
}
if (keyEquals(row->keys[c], key, length)) {
return table->index(h % ROWS, c);
}
}
if (row->next == NULL) {
DictTable* new_table = (DictTable*)calloc(1, sizeof(DictTable));
new_table->base_index = __sync_add_and_fetch(&_base_index, TABLE_CAPACITY);
if (!__sync_bool_compare_and_swap(&row->next, NULL, new_table)) {
free(new_table);
}
}
table = row->next;
h = (h >> ROW_BITS) | (h << (32 - ROW_BITS));
}
}
void Dictionary::collect(std::map<unsigned int, const char*>& map) {
collect(map, _table);
}
void Dictionary::collect(std::map<unsigned int, const char*>& map, DictTable* table) {
for (int i = 0; i < ROWS; i++) {
DictRow* row = &table->rows[i];
for (int j = 0; j < CELLS; j++) {
if (row->keys[j] != NULL) {
map[table->index(i, j)] = row->keys[j];
}
}
if (row->next != NULL) {
collect(map, row->next);
}
}
}

70
src/dictionary.h Normal file
View File

@@ -0,0 +1,70 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _DICTIONARY_H
#define _DICTIONARY_H
#include <map>
#include <stddef.h>
#define ROW_BITS 7
#define ROWS (1 << ROW_BITS)
#define CELLS 3
#define TABLE_CAPACITY (ROWS * CELLS)
struct DictTable;
struct DictRow {
char* keys[CELLS];
DictTable* next;
};
struct DictTable {
DictRow rows[ROWS];
unsigned int base_index;
unsigned int index(int row, int col) {
return base_index + (col << ROW_BITS) + row;
}
};
// Append-only concurrent hash table based on multi-level arrays
class Dictionary {
private:
DictTable* _table;
volatile unsigned int _base_index;
static void clear(DictTable* table);
static unsigned int hash(const char* key, size_t length);
static void collect(std::map<unsigned int, const char*>& map, DictTable* table);
public:
Dictionary();
~Dictionary();
void clear();
unsigned int lookup(const char* key);
unsigned int lookup(const char* key, size_t length);
void collect(std::map<unsigned int, const char*>& map);
};
#endif // _DICTIONARY_H

74
src/engine.cpp Normal file
View File

@@ -0,0 +1,74 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "engine.h"
#include "stackFrame.h"
volatile bool Engine::_enabled;
Error Engine::check(Arguments& args) {
return Error::OK;
}
CStack Engine::cstack() {
return CSTACK_FP;
}
int Engine::getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
CodeCache* java_methods, CodeCache* runtime_stubs) {
const void* pc;
uintptr_t fp;
uintptr_t prev_fp = (uintptr_t)&fp;
uintptr_t bottom = prev_fp + 0x100000;
if (ucontext == NULL) {
pc = __builtin_return_address(0);
fp = (uintptr_t)__builtin_frame_address(1);
} else {
StackFrame frame(ucontext);
pc = (const void*)frame.pc();
fp = frame.fp();
}
int depth = 0;
const void* const valid_pc = (const void* const)0x1000;
// Walk until the bottom of the stack or until the first Java frame
while (depth < max_depth && pc >= valid_pc) {
if (java_methods->contains(pc) || runtime_stubs->contains(pc)) {
break;
}
callchain[depth++] = pc;
// Check if the next frame is below on the current stack
if (fp <= prev_fp || fp >= prev_fp + 0x40000 || fp >= bottom) {
break;
}
// Frame pointer must be word aligned
if ((fp & (sizeof(uintptr_t) - 1)) != 0) {
break;
}
prev_fp = fp;
pc = ((const void**)fp)[1];
fp = ((uintptr_t*)fp)[0];
}
return depth;
}

38
src/engine.h Executable file → Normal file
View File

@@ -18,16 +18,50 @@
#define _ENGINE_H
#include "arguments.h"
#include "codeCache.h"
class Engine {
protected:
static volatile bool _enabled;
public:
virtual const char* name() = 0;
virtual const char* units() = 0;
virtual Error start(const char* event, long interval) = 0;
virtual Error check(Arguments& args);
virtual Error start(Arguments& args) = 0;
virtual void stop() = 0;
virtual ~Engine() {}
virtual CStack cstack();
virtual int getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
CodeCache* java_methods, CodeCache* runtime_stubs);
void enableEvents(bool enabled) {
_enabled = enabled;
}
};
class NoopEngine : public Engine {
public:
const char* name() {
return "noop";
}
const char* units() {
return "ns";
}
Error start(Arguments& args) {
return Error::OK;
}
void stop() {
}
CStack cstack() {
return CSTACK_NO;
}
};
#endif // _ENGINE_H

55
src/event.h Normal file
View File

@@ -0,0 +1,55 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _EVENT_H
#define _EVENT_H
#include <stdint.h>
#include "os.h"
class Event {
public:
u32 id() {
return *(u32*)this;
}
};
class ExecutionEvent : public Event {
public:
ThreadState _thread_state;
ExecutionEvent() : _thread_state(THREAD_RUNNING) {
}
};
class AllocEvent : public Event {
public:
u32 _class_id;
u64 _total_size;
u64 _instance_size;
};
class LockEvent : public Event {
public:
u32 _class_id;
u64 _start_time;
u64 _end_time;
uintptr_t _address;
long long _timeout;
};
#endif // _EVENT_H

File diff suppressed because it is too large Load Diff

View File

@@ -21,15 +21,18 @@
#include <string>
#include <iostream>
#include "arch.h"
#include "arguments.h"
class Trie {
private:
public:
std::map<std::string, Trie> _children;
u64 _total;
u64 _self;
public:
Trie() : _children(), _total(0), _self(0) {
}
Trie* addChild(const std::string& key, u64 value) {
_total += value;
return &_children[key];
@@ -40,7 +43,18 @@ class Trie {
_self += value;
}
friend class FlameGraph;
int depth(u64 cutoff) const {
if (_total < cutoff) {
return 0;
}
int max_depth = 0;
for (std::map<std::string, Trie>::const_iterator it = _children.begin(); it != _children.end(); ++it) {
int d = it->second.depth(cutoff);
if (d > max_depth) max_depth = d;
}
return max_depth + 1;
}
};
@@ -48,42 +62,32 @@ class FlameGraph {
private:
Trie _root;
char _buf[4096];
u64 _mintotal;
const char* _title;
int _maxdepth;
int _imagewidth;
int _imageheight;
int _frameheight;
Counter _counter;
double _minwidth;
double _scale;
double _pct;
bool _reverse;
void printHeader(std::ostream& out);
void printFooter(std::ostream& out);
double printFrame(std::ostream& out, const std::string& name, const Trie& f, double x, double y);
int selectFrameColor(std::string& name);
void printFrame(std::ostream& out, const std::string& name, const Trie& f, int level, u64 x);
void printTreeFrame(std::ostream& out, const Trie& f, int level);
int frameType(std::string& name);
public:
FlameGraph(const char* title, int width, int height, double minwidth, bool reverse) :
FlameGraph(const char* title, Counter counter, double minwidth, bool reverse) :
_root(),
_maxdepth(0),
_title(title),
_imagewidth(width),
_frameheight(height),
_counter(counter),
_minwidth(minwidth),
_reverse(reverse) {
_buf[sizeof(_buf) - 1] = 0;
}
Trie* root() {
return &_root;
}
void depth(int d) {
if (d > _maxdepth) _maxdepth = d;
}
void dump(std::ostream& out);
void dump(std::ostream& out, bool tree);
};
#endif // _FLAMEGRAPH_H

995
src/flightRecorder.cpp Normal file
View File

@@ -0,0 +1,995 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <map>
#include <string>
#include <arpa/inet.h>
#include <cxxabi.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>
#include "flightRecorder.h"
#include "jfrMetadata.h"
#include "dictionary.h"
#include "os.h"
#include "profiler.h"
#include "threadFilter.h"
#include "vmStructs.h"
const int BUFFER_SIZE = 1024;
const int BUFFER_LIMIT = BUFFER_SIZE - 128;
const int RECORDING_BUFFER_SIZE = 65536;
const int RECORDING_BUFFER_LIMIT = RECORDING_BUFFER_SIZE - 4096;
const int MAX_STRING_LENGTH = 16383;
static const char* const SETTING_COUNTER[] = {"samples", "total"};
static const char* const SETTING_RING[] = {NULL, "kernel", "user"};
static const char* const SETTING_CSTACK[] = {NULL, "no", "fp", "lbr"};
enum FrameTypeId {
FRAME_INTERPRETED = 0,
FRAME_JIT_COMPILED = 1,
FRAME_INLINED = 2,
FRAME_NATIVE = 3,
FRAME_CPP = 4,
FRAME_KERNEL = 5,
};
struct CpuTime {
u64 real;
u64 user;
u64 system;
};
struct CpuTimes {
CpuTime proc;
CpuTime total;
};
class MethodInfo {
public:
MethodInfo() : _key(0) {
}
u32 _key;
u32 _class;
u32 _name;
u32 _sig;
jint _modifiers;
jint _line_number_table_size;
jvmtiLineNumberEntry* _line_number_table;
FrameTypeId _type;
jint getLineNumber(jint bci) {
if (_line_number_table_size == 0) {
return 0;
}
int i = 1;
while (i < _line_number_table_size && bci >= _line_number_table[i].start_location) {
i++;
}
return _line_number_table[i - 1].line_number;
}
};
class Buffer {
private:
int _offset;
char _data[BUFFER_SIZE - sizeof(int)];
public:
Buffer() : _offset(0) {
}
const char* data() const {
return _data;
}
int offset() const {
return _offset;
}
int skip(int delta) {
int offset = _offset;
_offset = offset + delta;
return offset;
}
void reset() {
_offset = 0;
}
void put(const char* v, int len) {
memcpy(_data + _offset, v, len);
_offset += len;
}
void put8(char v) {
_data[_offset++] = v;
}
void put16(short v) {
*(short*)(_data + _offset) = htons(v);
_offset += 2;
}
void put32(int v) {
*(int*)(_data + _offset) = htonl(v);
_offset += 4;
}
void put64(u64 v) {
*(u64*)(_data + _offset) = OS::hton64(v);
_offset += 8;
}
void putFloat(float v) {
union {
float f;
int i;
} u;
u.f = v;
put32(u.i);
}
void putVarint(u64 v) {
char b = v;
while ((v >>= 7) != 0) {
_data[_offset++] = b | 0x80;
b = v;
}
_data[_offset++] = b;
}
void putUtf8(const char* v) {
if (v == NULL) {
put8(0);
} else {
putUtf8(v, strlen(v) & MAX_STRING_LENGTH);
}
}
void putUtf8(const char* v, int len) {
put8(3);
putVarint(len);
put(v, len);
}
void put8(int offset, char v) {
_data[offset] = v;
}
void putVar32(int offset, u32 v) {
_data[offset] = v | 0x80;
_data[offset + 1] = (v >> 7) | 0x80;
_data[offset + 2] = (v >> 14) | 0x80;
_data[offset + 3] = (v >> 21) | 0x80;
_data[offset + 4] = (v >> 28);
}
};
class RecordingBuffer : public Buffer {
private:
char _buf[RECORDING_BUFFER_SIZE - sizeof(Buffer)];
public:
RecordingBuffer() : Buffer() {
}
};
class Recording {
private:
static SpinLock _cpu_monitor_lock;
static char* _agent_properties;
static char* _jvm_args;
static char* _jvm_flags;
static char* _java_command;
RecordingBuffer _buf[CONCURRENCY_LEVEL];
int _fd;
off_t _file_offset;
ThreadFilter _thread_set;
Dictionary _packages;
Dictionary _symbols;
std::map<jmethodID, MethodInfo> _method_map;
u64 _start_time;
u64 _start_nanos;
u64 _stop_time;
u64 _stop_nanos;
int _tid;
int _available_processors;
Buffer _cpu_monitor_buf;
Timer* _cpu_monitor;
CpuTimes _last_times;
void startCpuMonitor() {
_last_times.proc.real = OS::getProcessCpuTime(&_last_times.proc.user, &_last_times.proc.system);
_last_times.total.real = OS::getTotalCpuTime(&_last_times.total.user, &_last_times.total.system);
_cpu_monitor = OS::startTimer(1000000000, cpuMonitorCallback, this);
_cpu_monitor_lock.unlock();
}
void stopCpuMonitor() {
_cpu_monitor_lock.lock();
OS::stopTimer(_cpu_monitor);
}
void cpuMonitorCycle() {
CpuTimes times;
times.proc.real = OS::getProcessCpuTime(&times.proc.user, &times.proc.system);
times.total.real = OS::getTotalCpuTime(&times.total.user, &times.total.system);
float proc_user = 0, proc_system = 0, machine_total = 0;
if (times.proc.real != (u64)-1 && times.proc.real > _last_times.proc.real) {
float delta = (times.proc.real - _last_times.proc.real) * _available_processors;
proc_user = ratio((times.proc.user - _last_times.proc.user) / delta);
proc_system = ratio((times.proc.system - _last_times.proc.system) / delta);
}
if (times.total.real != (u64)-1 && times.total.real > _last_times.total.real) {
float delta = times.total.real - _last_times.total.real;
machine_total = ratio(((times.total.user + times.total.system) -
(_last_times.total.user + _last_times.total.system)) / delta);
if (machine_total < proc_user + proc_system) {
machine_total = ratio(proc_user + proc_system);
}
}
recordCpuLoad(&_cpu_monitor_buf, proc_user, proc_system, machine_total);
if (_cpu_monitor_buf.offset() > BUFFER_LIMIT) {
flush(&_cpu_monitor_buf);
}
_last_times = times;
}
static void cpuMonitorCallback(void* arg) {
if (_cpu_monitor_lock.tryLock()) {
((Recording*)arg)->cpuMonitorCycle();
_cpu_monitor_lock.unlock();
}
}
static float ratio(float value) {
return value < 0 ? 0 : value > 1 ? 1 : value;
}
public:
Recording(int fd, Arguments& args) : _fd(fd), _thread_set(), _packages(), _symbols(), _method_map() {
_file_offset = lseek(_fd, 0, SEEK_END);
_start_time = OS::millis();
_start_nanos = OS::nanotime();
_tid = OS::threadId();
addThread(_tid);
VM::jvmti()->GetAvailableProcessors(&_available_processors);
writeHeader(_buf);
writeMetadata(_buf);
writeRecordingInfo(_buf);
writeSettings(_buf, args);
writeOsCpuInfo(_buf);
writeJvmInfo(_buf);
writeSystemProperties(_buf);
flush(_buf);
startCpuMonitor();
}
~Recording() {
stopCpuMonitor();
_stop_nanos = OS::nanotime();
_stop_time = OS::millis();
for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
flush(&_buf[i]);
}
flush(&_cpu_monitor_buf);
off_t cpool_offset = lseek(_fd, 0, SEEK_CUR);
writeCpool(_buf);
flush(_buf);
off_t chunk_size = lseek(_fd, 0, SEEK_CUR);
// Patch checkpoint size field
_buf->putVar32(0, chunk_size - cpool_offset);
ssize_t result = pwrite(_fd, _buf->data(), 5, cpool_offset);
(void)result;
// Patch chunk header
_buf->put64(chunk_size);
_buf->put64(cpool_offset);
_buf->put64(68);
_buf->put64(_start_time * 1000000);
_buf->put64(_stop_nanos - _start_nanos);
result = pwrite(_fd, _buf->data(), 40, 8);
(void)result;
close(_fd);
}
Buffer* buffer(int lock_index) {
return &_buf[lock_index];
}
void fillNativeMethodInfo(MethodInfo* mi, const char* name) {
mi->_class = Profiler::_instance.classMap()->lookup("");
mi->_modifiers = 0x100;
mi->_line_number_table_size = 0;
mi->_line_number_table = NULL;
if (name[0] == '_' && name[1] == 'Z') {
int status;
char* demangled = abi::__cxa_demangle(name, NULL, NULL, &status);
if (demangled != NULL) {
char* p = strchr(demangled, '(');
if (p != NULL) *p = 0;
mi->_name = _symbols.lookup(demangled);
mi->_sig = _symbols.lookup("()L;");
mi->_type = FRAME_CPP;
free(demangled);
return;
}
}
size_t len = strlen(name);
if (len >= 4 && strcmp(name + len - 4, "_[k]") == 0) {
mi->_name = _symbols.lookup(name, len - 4);
mi->_sig = _symbols.lookup("(Lk;)L;");
mi->_type = FRAME_KERNEL;
} else {
mi->_name = _symbols.lookup(name);
mi->_sig = _symbols.lookup("()L;");
mi->_type = FRAME_NATIVE;
}
}
void fillJavaMethodInfo(MethodInfo* mi, jmethodID method) {
jvmtiEnv* jvmti = VM::jvmti();
jclass method_class;
char* class_name = NULL;
char* method_name = NULL;
char* method_sig = NULL;
if (jvmti->GetMethodDeclaringClass(method, &method_class) == 0 &&
jvmti->GetClassSignature(method_class, &class_name, NULL) == 0 &&
jvmti->GetMethodName(method, &method_name, &method_sig, NULL) == 0) {
mi->_class = Profiler::_instance.classMap()->lookup(class_name + 1, strlen(class_name) - 2);
mi->_name = _symbols.lookup(method_name);
mi->_sig = _symbols.lookup(method_sig);
} else {
mi->_class = Profiler::_instance.classMap()->lookup("");
mi->_name = _symbols.lookup("jvmtiError");
mi->_sig = _symbols.lookup("()L;");
}
jvmti->Deallocate((unsigned char*)method_sig);
jvmti->Deallocate((unsigned char*)method_name);
jvmti->Deallocate((unsigned char*)class_name);
if (jvmti->GetMethodModifiers(method, &mi->_modifiers) != 0) {
mi->_modifiers = 0;
}
if (jvmti->GetLineNumberTable(method, &mi->_line_number_table_size, &mi->_line_number_table) != 0) {
mi->_line_number_table_size = 0;
mi->_line_number_table = NULL;
}
mi->_type = FRAME_INTERPRETED;
}
MethodInfo* resolveMethod(ASGCT_CallFrame& frame) {
jmethodID method = frame.method_id;
MethodInfo* mi = &_method_map[method];
if (mi->_key == 0) {
mi->_key = _method_map.size();
if (method == NULL) {
fillNativeMethodInfo(mi, "unknown");
} else if (frame.bci == BCI_NATIVE_FRAME || frame.bci == BCI_ERROR) {
fillNativeMethodInfo(mi, (const char*)method);
} else {
fillJavaMethodInfo(mi, method);
}
}
return mi;
}
u32 getPackage(const char* class_name) {
const char* package = strrchr(class_name, '/');
if (package == NULL) {
return 0;
}
if (class_name[0] == '[') {
class_name = strchr(class_name, 'L') + 1;
}
return _packages.lookup(class_name, package - class_name);
}
bool parseAgentProperties() {
JNIEnv* env = VM::jni();
jclass vm_support = env->FindClass("jdk/internal/vm/VMSupport");
if (vm_support == NULL) vm_support = env->FindClass("sun/misc/VMSupport");
if (vm_support != NULL) {
jmethodID get_agent_props = env->GetStaticMethodID(vm_support, "getAgentProperties", "()Ljava/util/Properties;");
jmethodID to_string = env->GetMethodID(env->FindClass("java/lang/Object"), "toString", "()Ljava/lang/String;");
if (get_agent_props != NULL && to_string != NULL) {
jobject props = env->CallStaticObjectMethod(vm_support, get_agent_props);
if (props != NULL) {
jstring str = (jstring)env->CallObjectMethod(props, to_string);
if (str != NULL) {
_agent_properties = (char*)env->GetStringUTFChars(str, NULL);
}
}
}
}
env->ExceptionClear();
if (_agent_properties == NULL) {
return false;
}
char* p = _agent_properties + 1;
p[strlen(p) - 1] = 0;
while (*p) {
if (strncmp(p, "sun.jvm.args=", 13) == 0) {
_jvm_args = p + 13;
} else if (strncmp(p, "sun.jvm.flags=", 14) == 0) {
_jvm_flags = p + 14;
} else if (strncmp(p, "sun.java.command=", 17) == 0) {
_java_command = p + 17;
}
if ((p = strstr(p, ", ")) == NULL) {
break;
}
*p = 0;
p += 2;
}
return true;
}
void flush(Buffer* buf) {
ssize_t result = write(_fd, buf->data(), buf->offset());
(void)result;
buf->reset();
}
void flushIfNeeded(Buffer* buf) {
if (buf->offset() >= RECORDING_BUFFER_LIMIT) {
flush(buf);
}
}
void writeHeader(Buffer* buf) {
buf->put("FLR\0", 4); // magic
buf->put16(2); // major
buf->put16(0); // minor
buf->put64(0); // chunk size
buf->put64(0); // cpool offset
buf->put64(0); // meta offset
buf->put64(_start_time * 1000000); // start time, ns
buf->put64(0); // duration, ns
buf->put64(_start_nanos); // start ticks
buf->put64(1000000000); // ticks per sec
buf->put32(1); // features
}
void writeMetadata(Buffer* buf) {
int metadata_start = buf->skip(5); // size will be patched later
buf->putVarint(T_METADATA);
buf->putVarint(_start_nanos);
buf->putVarint(0);
buf->putVarint(1);
std::vector<std::string>& strings = JfrMetadata::strings();
buf->putVarint(strings.size());
for (int i = 0; i < strings.size(); i++) {
buf->putUtf8(strings[i].c_str());
}
writeElement(buf, JfrMetadata::root());
buf->putVar32(metadata_start, buf->offset() - metadata_start);
}
void writeElement(Buffer* buf, const Element* e) {
buf->putVarint(e->_name);
buf->putVarint(e->_attributes.size());
for (int i = 0; i < e->_attributes.size(); i++) {
buf->putVarint(e->_attributes[i]._key);
buf->putVarint(e->_attributes[i]._value);
}
buf->putVarint(e->_children.size());
for (int i = 0; i < e->_children.size(); i++) {
writeElement(buf, e->_children[i]);
}
}
void writeRecordingInfo(Buffer* buf) {
int start = buf->skip(1);
buf->put8(T_ACTIVE_RECORDING);
buf->putVarint(_start_nanos);
buf->putVarint(0);
buf->putVarint(_tid);
buf->putVarint(1);
buf->putUtf8("async-profiler " PROFILER_VERSION);
buf->putUtf8("async-profiler.jfr");
buf->putVarint(0x7fffffffffffffffULL);
buf->putVarint(0);
buf->putVarint(_start_time);
buf->putVarint(0x7fffffffffffffffULL);
buf->put8(start, buf->offset() - start);
}
void writeSettings(Buffer* buf, Arguments& args) {
writeStringSetting(buf, T_ACTIVE_RECORDING, "version", PROFILER_VERSION);
writeStringSetting(buf, T_ACTIVE_RECORDING, "counter", SETTING_COUNTER[args._counter]);
writeStringSetting(buf, T_ACTIVE_RECORDING, "ring", SETTING_RING[args._ring]);
writeStringSetting(buf, T_ACTIVE_RECORDING, "cstack", SETTING_CSTACK[args._cstack]);
writeStringSetting(buf, T_ACTIVE_RECORDING, "event", args._event_desc);
writeStringSetting(buf, T_ACTIVE_RECORDING, "filter", args._filter);
writeStringSetting(buf, T_ACTIVE_RECORDING, "begin", args._begin);
writeStringSetting(buf, T_ACTIVE_RECORDING, "end", args._end);
writeListSetting(buf, T_ACTIVE_RECORDING, "include", args._buf, args._include);
writeListSetting(buf, T_ACTIVE_RECORDING, "exclude", args._buf, args._exclude);
writeIntSetting(buf, T_ACTIVE_RECORDING, "jstackdepth", args._jstackdepth);
writeIntSetting(buf, T_ACTIVE_RECORDING, "safemode", args._safe_mode);
writeBoolSetting(buf, T_EXECUTION_SAMPLE, "enabled", args._events & EK_CPU);
writeIntSetting(buf, T_EXECUTION_SAMPLE, "interval", args._interval);
writeBoolSetting(buf, T_ALLOC_IN_NEW_TLAB, "enabled", args._events & EK_ALLOC);
writeBoolSetting(buf, T_ALLOC_OUTSIDE_TLAB, "enabled", args._events & EK_ALLOC);
writeBoolSetting(buf, T_MONITOR_ENTER, "enabled", args._events & EK_LOCK);
writeBoolSetting(buf, T_THREAD_PARK, "enabled", args._events & EK_LOCK);
}
void writeStringSetting(Buffer* buf, int category, const char* key, const char* value) {
int start = buf->skip(5);
buf->put8(T_ACTIVE_SETTING);
buf->putVarint(_start_nanos);
buf->putVarint(0);
buf->putVarint(_tid);
buf->putVarint(category);
buf->putUtf8(key);
buf->putUtf8(value);
buf->putVar32(start, buf->offset() - start);
flushIfNeeded(buf);
}
void writeBoolSetting(Buffer* buf, int category, const char* key, bool value) {
writeStringSetting(buf, category, key, value ? "true" : "false");
}
void writeIntSetting(Buffer* buf, int category, const char* key, long value) {
char str[32];
sprintf(str, "%ld", value);
writeStringSetting(buf, category, key, str);
}
void writeListSetting(Buffer* buf, int category, const char* key, const char* base, int offset) {
while (offset != 0) {
writeStringSetting(buf, category, key, base + offset);
offset = ((int*)(base + offset))[-1];
}
}
void writeOsCpuInfo(Buffer* buf) {
struct utsname u;
if (uname(&u) != 0) {
return;
}
char str[512];
snprintf(str, sizeof(str) - 1, "uname: %s %s %s %s", u.sysname, u.release, u.version, u.machine);
str[sizeof(str) - 1] = 0;
int start = buf->skip(5);
buf->put8(T_OS_INFORMATION);
buf->putVarint(_start_nanos);
buf->putUtf8(str);
buf->putVar32(start, buf->offset() - start);
start = buf->skip(5);
buf->put8(T_CPU_INFORMATION);
buf->putVarint(_start_nanos);
buf->putUtf8(u.machine);
buf->putUtf8(OS::getCpuDescription(str, sizeof(str) - 1) ? str : "");
buf->putVarint(1);
buf->putVarint(_available_processors);
buf->putVarint(_available_processors);
buf->putVar32(start, buf->offset() - start);
}
void writeJvmInfo(Buffer* buf) {
if (_agent_properties == NULL && !parseAgentProperties()) {
return;
}
char* jvm_name = NULL;
char* jvm_version = NULL;
jvmtiEnv* jvmti = VM::jvmti();
jvmti->GetSystemProperty("java.vm.name", &jvm_name);
jvmti->GetSystemProperty("java.vm.version", &jvm_version);
int start = buf->skip(5);
buf->put8(T_JVM_INFORMATION);
buf->putVarint(_start_nanos);
buf->putUtf8(jvm_name);
buf->putUtf8(jvm_version);
buf->putUtf8(_jvm_args);
buf->putUtf8(_jvm_flags);
buf->putUtf8(_java_command);
buf->putVarint(OS::processStartTime());
buf->putVarint(OS::processId());
buf->putVar32(start, buf->offset() - start);
flushIfNeeded(buf);
jvmti->Deallocate((unsigned char*)jvm_version);
jvmti->Deallocate((unsigned char*)jvm_name);
}
void writeSystemProperties(Buffer* buf) {
jvmtiEnv* jvmti = VM::jvmti();
jint count;
char** keys;
if (jvmti->GetSystemProperties(&count, &keys) != 0) {
return;
}
for (int i = 0; i < count; i++) {
char* key = keys[i];
char* value = NULL;
if (jvmti->GetSystemProperty(key, &value) == 0) {
int start = buf->skip(5);
buf->put8(T_INITIAL_SYSTEM_PROPERTY);
buf->putVarint(_start_nanos);
buf->putUtf8(key);
buf->putUtf8(value);
buf->putVar32(start, buf->offset() - start);
flushIfNeeded(buf);
jvmti->Deallocate((unsigned char*)value);
}
}
jvmti->Deallocate((unsigned char*)keys);
}
void writeCpool(Buffer* buf) {
buf->skip(5); // size will be patched later
buf->putVarint(T_CPOOL);
buf->putVarint(_start_nanos);
buf->putVarint(0);
buf->putVarint(0);
buf->putVarint(1);
buf->putVarint(8);
writeFrameTypes(buf);
writeThreadStates(buf);
writeThreads(buf);
writeStackTraces(buf);
writeMethods(buf);
writeClasses(buf);
writePackages(buf);
writeSymbols(buf);
}
void writeFrameTypes(Buffer* buf) {
buf->putVarint(T_FRAME_TYPE);
buf->putVarint(6);
buf->putVarint(FRAME_INTERPRETED); buf->putUtf8("Interpreted");
buf->putVarint(FRAME_JIT_COMPILED); buf->putUtf8("JIT compiled");
buf->putVarint(FRAME_INLINED); buf->putUtf8("Inlined");
buf->putVarint(FRAME_NATIVE); buf->putUtf8("Native");
buf->putVarint(FRAME_CPP); buf->putUtf8("C++");
buf->putVarint(FRAME_KERNEL); buf->putUtf8("Kernel");
}
void writeThreadStates(Buffer* buf) {
buf->putVarint(T_THREAD_STATE);
buf->putVarint(2);
buf->putVarint(THREAD_RUNNING); buf->putUtf8("STATE_RUNNABLE");
buf->putVarint(THREAD_SLEEPING); buf->putUtf8("STATE_SLEEPING");
}
void writeThreads(Buffer* buf) {
std::vector<int> threads;
_thread_set.collect(threads);
MutexLocker ml(Profiler::_instance._thread_names_lock);
std::map<int, std::string>& thread_names = Profiler::_instance._thread_names;
std::map<int, jlong>& thread_ids = Profiler::_instance._thread_ids;
char name_buf[32];
buf->putVarint(T_THREAD);
buf->putVarint(threads.size());
for (int i = 0; i < threads.size(); i++) {
const char* thread_name;
jlong thread_id;
std::map<int, std::string>::const_iterator it = thread_names.find(threads[i]);
if (it != thread_names.end()) {
thread_name = it->second.c_str();
thread_id = thread_ids[threads[i]];
} else {
sprintf(name_buf, "[tid=%d]", threads[i]);
thread_name = name_buf;
thread_id = 0;
}
buf->putVarint(threads[i]);
buf->putUtf8(thread_name);
buf->putVarint(threads[i]);
if (thread_id == 0) {
buf->put8(0);
} else {
buf->putUtf8(thread_name);
}
buf->putVarint(thread_id);
flushIfNeeded(buf);
}
}
void writeStackTraces(Buffer* buf) {
std::map<u32, CallTrace*> traces;
Profiler::_instance._call_trace_storage.collectTraces(traces);
buf->putVarint(T_STACK_TRACE);
buf->putVarint(traces.size());
for (std::map<u32, CallTrace*>::const_iterator it = traces.begin(); it != traces.end(); ++it) {
CallTrace* trace = it->second;
buf->putVarint(it->first);
buf->putVarint(0); // truncated
buf->putVarint(trace->num_frames);
for (int i = 0; i < trace->num_frames; i++) {
MethodInfo* mi = resolveMethod(trace->frames[i]);
buf->putVarint(mi->_key);
jint bci = trace->frames[i].bci;
if (bci >= 0) {
buf->putVarint(mi->getLineNumber(bci));
buf->putVarint(bci);
} else {
buf->put8(0);
buf->put8(0);
}
buf->putVarint(mi->_type);
flushIfNeeded(buf);
}
flushIfNeeded(buf);
}
}
void writeMethods(Buffer* buf) {
jvmtiEnv* jvmti = VM::jvmti();
buf->putVarint(T_METHOD);
buf->putVarint(_method_map.size());
for (std::map<jmethodID, MethodInfo>::const_iterator it = _method_map.begin(); it != _method_map.end(); ++it) {
const MethodInfo& mi = it->second;
buf->putVarint(mi._key);
buf->putVarint(mi._class);
buf->putVarint(mi._name);
buf->putVarint(mi._sig);
buf->putVarint(mi._modifiers);
buf->putVarint(0); // hidden
flushIfNeeded(buf);
if (mi._line_number_table != NULL) {
jvmti->Deallocate((unsigned char*)mi._line_number_table);
}
}
}
void writeClasses(Buffer* buf) {
std::map<u32, const char*> classes;
Profiler::_instance.classMap()->collect(classes);
buf->putVarint(T_CLASS);
buf->putVarint(classes.size());
for (std::map<u32, const char*>::const_iterator it = classes.begin(); it != classes.end(); ++it) {
const char* name = it->second;
buf->putVarint(it->first);
buf->putVarint(0); // classLoader
buf->putVarint(_symbols.lookup(name));
buf->putVarint(getPackage(name));
buf->putVarint(0); // access flags
flushIfNeeded(buf);
}
}
void writePackages(Buffer* buf) {
std::map<u32, const char*> packages;
_packages.collect(packages);
buf->putVarint(T_PACKAGE);
buf->putVarint(packages.size());
for (std::map<u32, const char*>::const_iterator it = packages.begin(); it != packages.end(); ++it) {
buf->putVarint(it->first);
buf->putVarint(_symbols.lookup(it->second));
flushIfNeeded(buf);
}
}
void writeSymbols(Buffer* buf) {
std::map<u32, const char*> symbols;
_symbols.collect(symbols);
buf->putVarint(T_SYMBOL);
buf->putVarint(symbols.size());
for (std::map<u32, const char*>::const_iterator it = symbols.begin(); it != symbols.end(); ++it) {
buf->putVarint(it->first);
buf->putUtf8(it->second);
flushIfNeeded(buf);
}
}
void recordExecutionSample(Buffer* buf, int tid, u32 call_trace_id, ExecutionEvent* event) {
int start = buf->skip(1);
buf->put8(T_EXECUTION_SAMPLE);
buf->putVarint(OS::nanotime());
buf->putVarint(tid);
buf->putVarint(call_trace_id);
buf->putVarint(event->_thread_state);
buf->put8(start, buf->offset() - start);
}
void recordAllocationInNewTLAB(Buffer* buf, int tid, u32 call_trace_id, AllocEvent* event) {
int start = buf->skip(1);
buf->put8(T_ALLOC_IN_NEW_TLAB);
buf->putVarint(OS::nanotime());
buf->putVarint(tid);
buf->putVarint(call_trace_id);
buf->putVarint(event->_class_id);
buf->putVarint(event->_instance_size);
buf->putVarint(event->_total_size);
buf->put8(start, buf->offset() - start);
}
void recordAllocationOutsideTLAB(Buffer* buf, int tid, u32 call_trace_id, AllocEvent* event) {
int start = buf->skip(1);
buf->put8(T_ALLOC_OUTSIDE_TLAB);
buf->putVarint(OS::nanotime());
buf->putVarint(tid);
buf->putVarint(call_trace_id);
buf->putVarint(event->_class_id);
buf->putVarint(event->_total_size);
buf->put8(start, buf->offset() - start);
}
void recordMonitorBlocked(Buffer* buf, int tid, u32 call_trace_id, LockEvent* event) {
int start = buf->skip(1);
buf->put8(T_MONITOR_ENTER);
buf->putVarint(event->_start_time);
buf->putVarint(event->_end_time - event->_start_time);
buf->putVarint(tid);
buf->putVarint(call_trace_id);
buf->putVarint(event->_class_id);
buf->putVarint(event->_address);
buf->put8(start, buf->offset() - start);
}
void recordThreadPark(Buffer* buf, int tid, u32 call_trace_id, LockEvent* event) {
int start = buf->skip(1);
buf->put8(T_THREAD_PARK);
buf->putVarint(event->_start_time);
buf->putVarint(event->_end_time - event->_start_time);
buf->putVarint(tid);
buf->putVarint(call_trace_id);
buf->putVarint(event->_class_id);
buf->putVarint(event->_timeout);
buf->putVarint(event->_address);
buf->put8(start, buf->offset() - start);
}
void recordCpuLoad(Buffer* buf, float proc_user, float proc_system, float machine_total) {
int start = buf->skip(1);
buf->put8(T_CPU_LOAD);
buf->putVarint(OS::nanotime());
buf->putFloat(proc_user);
buf->putFloat(proc_system);
buf->putFloat(machine_total);
buf->put8(start, buf->offset() - start);
}
void addThread(int tid) {
if (!_thread_set.accept(tid)) {
_thread_set.add(tid);
}
}
};
SpinLock Recording::_cpu_monitor_lock(1);
char* Recording::_agent_properties = NULL;
char* Recording::_jvm_args = NULL;
char* Recording::_jvm_flags = NULL;
char* Recording::_java_command = NULL;
Error FlightRecorder::start(Arguments& args, bool reset) {
if (args._file == NULL || args._file[0] == 0) {
return Error("Flight Recorder output file is not specified");
}
int fd = open(args._file, O_CREAT | O_WRONLY | (reset ? O_TRUNC : 0), 0644);
if (fd == -1) {
return Error("Cannot open Flight Recorder output file");
}
_rec = new Recording(fd, args);
return Error::OK;
}
void FlightRecorder::stop() {
if (_rec != NULL) {
delete _rec;
_rec = NULL;
}
}
// TODO: record events with call_trace_id == 0, and use stack allocated buffer if needed
void FlightRecorder::recordEvent(int lock_index, int tid, u32 call_trace_id,
int event_type, Event* event, u64 counter) {
if (_rec != NULL && call_trace_id != 0) {
Buffer* buf = _rec->buffer(lock_index);
switch (event_type) {
case 0:
_rec->recordExecutionSample(buf, tid, call_trace_id, (ExecutionEvent*)event);
break;
case BCI_ALLOC:
_rec->recordAllocationInNewTLAB(buf, tid, call_trace_id, (AllocEvent*)event);
break;
case BCI_ALLOC_OUTSIDE_TLAB:
_rec->recordAllocationOutsideTLAB(buf, tid, call_trace_id, (AllocEvent*)event);
break;
case BCI_LOCK:
_rec->recordMonitorBlocked(buf, tid, call_trace_id, (LockEvent*)event);
break;
case BCI_PARK:
_rec->recordThreadPark(buf, tid, call_trace_id, (LockEvent*)event);
break;
}
_rec->flushIfNeeded(buf);
_rec->addThread(tid);
}
}

45
src/flightRecorder.h Normal file
View File

@@ -0,0 +1,45 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _FLIGHTRECORDER_H
#define _FLIGHTRECORDER_H
#include "arch.h"
#include "arguments.h"
#include "event.h"
class Recording;
class FlightRecorder {
private:
Recording* _rec;
public:
FlightRecorder() : _rec(NULL) {
}
Error start(Arguments& args, bool reset);
void stop();
bool active() {
return _rec != NULL;
}
void recordEvent(int lock_index, int tid, u32 call_trace_id,
int event_type, Event* event, u64 counter);
};
#endif // _FLIGHTRECORDER_H

194
src/frameName.cpp Executable file → Normal file
View File

@@ -19,70 +19,97 @@
#include <stdlib.h>
#include <string.h>
#include "frameName.h"
#include "profiler.h"
#include "vmStructs.h"
void FrameName::initThreadMap() {
if (!VMThread::available()) {
return;
Matcher::Matcher(const char* pattern) {
if (pattern[0] == '*') {
_type = MATCH_ENDS_WITH;
_pattern = strdup(pattern + 1);
} else {
_type = MATCH_EQUALS;
_pattern = strdup(pattern);
}
JNIEnv* env = VM::jni();
jclass threadClass = env->FindClass("java/lang/Thread");
if (threadClass == NULL) {
return;
_len = strlen(_pattern);
if (_len > 0 && _pattern[_len - 1] == '*') {
_type = _type == MATCH_EQUALS ? MATCH_STARTS_WITH : MATCH_CONTAINS;
_pattern[--_len] = 0;
}
jfieldID eetop = env->GetFieldID(threadClass, "eetop", "J");
if (eetop == NULL) {
return;
}
Matcher::~Matcher() {
free(_pattern);
}
Matcher::Matcher(const Matcher& m) {
_type = m._type;
_pattern = strdup(m._pattern);
_len = m._len;
}
Matcher& Matcher::operator=(const Matcher& m) {
free(_pattern);
_type = m._type;
_pattern = strdup(m._pattern);
_len = m._len;
return *this;
}
bool Matcher::matches(const char* s) {
switch (_type) {
case MATCH_EQUALS:
return strcmp(s, _pattern) == 0;
case MATCH_CONTAINS:
return strstr(s, _pattern) != NULL;
case MATCH_STARTS_WITH:
return strncmp(s, _pattern, _len) == 0;
case MATCH_ENDS_WITH:
int slen = strlen(s);
return slen >= _len && strcmp(s + slen - _len, _pattern) == 0;
}
return false;
}
jvmtiEnv* jvmti = VM::jvmti();
jthread* thread_objects;
if (jvmti->GetAllThreads(&_thread_count, &thread_objects) != 0) {
return;
}
_threads = (ThreadId*)calloc(_thread_count, sizeof(ThreadId));
FrameName::FrameName(Arguments& args, int style, Mutex& thread_names_lock, ThreadMap& thread_names) :
_cache(),
_class_names(),
_include(),
_exclude(),
_style(style),
_thread_names_lock(thread_names_lock),
_thread_names(thread_names)
{
// Require printf to use standard C format regardless of system locale
_saved_locale = uselocale(newlocale(LC_NUMERIC_MASK, "C", (locale_t)0));
memset(_buf, 0, sizeof(_buf));
// Create a map [OS thread ID] -> [Java thread name] backed by a sorted array
for (int i = 0; i < _thread_count; i++) {
VMThread* vm_thread = (VMThread*)(uintptr_t)env->GetLongField(thread_objects[i], eetop);
jvmtiThreadInfo thread_info;
if (vm_thread != NULL && jvmti->GetThreadInfo(thread_objects[i], &thread_info) == 0) {
_threads[i]._id = vm_thread->osThreadId();
_threads[i]._name = thread_info.name;
}
}
buildFilter(_include, args._buf, args._include);
buildFilter(_exclude, args._buf, args._exclude);
qsort(_threads, _thread_count, sizeof(ThreadId), ThreadId::comparator);
jvmti->Deallocate((unsigned char*)thread_objects);
Profiler::_instance.classMap()->collect(_class_names);
}
FrameName::~FrameName() {
jvmtiEnv* jvmti = VM::jvmti();
for (int i = 0; i < _thread_count; i++) {
jvmti->Deallocate((unsigned char*)_threads[i]._name);
}
free(_threads);
freelocale(uselocale(_saved_locale));
}
const char* FrameName::findThreadName(int tid) {
int low = 0;
int high = _thread_count - 1;
while (low <= high) {
int mid = (unsigned int)(low + high) >> 1;
if (_threads[mid]._id < tid) {
low = mid + 1;
} else if (_threads[mid]._id > tid) {
high = mid - 1;
} else {
return _threads[mid]._name;
}
void FrameName::buildFilter(std::vector<Matcher>& vector, const char* base, int offset) {
while (offset != 0) {
vector.push_back(base + offset);
offset = ((int*)(base + offset))[-1];
}
}
return NULL;
char* FrameName::truncate(char* name, int max_length) {
if (strlen(name) > max_length && max_length >= 4) {
strcpy(name + max_length - 4, "...)");
}
return name;
}
const char* FrameName::cppDemangle(const char* name) {
@@ -90,7 +117,7 @@ const char* FrameName::cppDemangle(const char* name) {
int status;
char* demangled = abi::__cxa_demangle(name, NULL, NULL, &status);
if (demangled != NULL) {
strncpy(_buf, demangled, sizeof(_buf));
strncpy(_buf, demangled, sizeof(_buf) - 1);
free(demangled);
return _buf;
}
@@ -102,30 +129,34 @@ char* FrameName::javaMethodName(jmethodID method) {
jclass method_class;
char* class_name = NULL;
char* method_name = NULL;
char* method_sig = NULL;
char* result;
jvmtiEnv* jvmti = VM::jvmti();
jvmtiError err;
if ((err = jvmti->GetMethodName(method, &method_name, NULL, NULL)) == 0 &&
if ((err = jvmti->GetMethodName(method, &method_name, &method_sig, NULL)) == 0 &&
(err = jvmti->GetMethodDeclaringClass(method, &method_class)) == 0 &&
(err = jvmti->GetClassSignature(method_class, &class_name, NULL)) == 0) {
// Trim 'L' and ';' off the class descriptor like 'Ljava/lang/Object;'
result = javaClassName(class_name + 1, strlen(class_name) - 2, _simple, _dotted);
result = javaClassName(class_name + 1, strlen(class_name) - 2, _style);
strcat(result, ".");
strcat(result, method_name);
if (_style & STYLE_SIGNATURES) strcat(result, truncate(method_sig, 255));
if (_style & STYLE_ANNOTATE) strcat(result, "_[j]");
} else {
snprintf(_buf, sizeof(_buf), "[jvmtiError %d]", err);
snprintf(_buf, sizeof(_buf) - 1, "[jvmtiError %d]", err);
result = _buf;
}
jvmti->Deallocate((unsigned char*)class_name);
jvmti->Deallocate((unsigned char*)method_sig);
jvmti->Deallocate((unsigned char*)method_name);
return result;
}
char* FrameName::javaClassName(const char* symbol, int length, bool simple, bool dotted) {
char* FrameName::javaClassName(const char* symbol, int length, int style) {
char* result = _buf;
int array_dimension = 0;
@@ -158,13 +189,13 @@ char* FrameName::javaClassName(const char* symbol, int length, bool simple, bool
} while (--array_dimension > 0);
}
if (simple) {
if (style & STYLE_SIMPLE) {
for (char* s = result; *s; s++) {
if (*s == '/') result = s + 1;
}
}
if (dotted) {
if (style & STYLE_DOTTED) {
for (char* s = result; *s; s++) {
if (*s == '/') *s = '.';
}
@@ -173,7 +204,7 @@ char* FrameName::javaClassName(const char* symbol, int length, bool simple, bool
return result;
}
const char* FrameName::name(ASGCT_CallFrame& frame) {
const char* FrameName::name(ASGCT_CallFrame& frame, bool for_matching) {
if (frame.method_id == NULL) {
return "[unknown]";
}
@@ -182,26 +213,34 @@ const char* FrameName::name(ASGCT_CallFrame& frame) {
case BCI_NATIVE_FRAME:
return cppDemangle((const char*)frame.method_id);
case BCI_SYMBOL: {
VMSymbol* symbol = (VMSymbol*)frame.method_id;
char* class_name = javaClassName(symbol->body(), symbol->length(), _simple, true);
return strcat(class_name, _dotted ? "" : "_[i]");
}
case BCI_SYMBOL_OUTSIDE_TLAB: {
VMSymbol* symbol = (VMSymbol*)((uintptr_t)frame.method_id ^ 1);
char* class_name = javaClassName(symbol->body(), symbol->length(), _simple, true);
return strcat(class_name, _dotted ? " (out)" : "_[k]");
case BCI_ALLOC:
case BCI_ALLOC_OUTSIDE_TLAB:
case BCI_LOCK:
case BCI_PARK: {
const char* symbol = _class_names[(uintptr_t)frame.method_id];
char* class_name = javaClassName(symbol, strlen(symbol), _style | STYLE_DOTTED);
if (!for_matching && !(_style & STYLE_DOTTED)) {
strcat(class_name, frame.bci == BCI_ALLOC_OUTSIDE_TLAB ? "_[k]" : "_[i]");
}
return class_name;
}
case BCI_THREAD_ID: {
int tid = (int)(uintptr_t)frame.method_id;
const char* name = findThreadName(tid);
if (name != NULL) {
return name;
MutexLocker ml(_thread_names_lock);
ThreadMap::iterator it = _thread_names.find(tid);
if (for_matching) {
return it != _thread_names.end() ? it->second.c_str() : "";
} else if (it != _thread_names.end()) {
snprintf(_buf, sizeof(_buf) - 1, "[%s tid=%d]", it->second.c_str(), tid);
} else {
snprintf(_buf, sizeof(_buf) - 1, "[tid=%d]", tid);
}
return _buf;
}
snprintf(_buf, sizeof(_buf), "[thread %d]", tid);
case BCI_ERROR: {
snprintf(_buf, sizeof(_buf) - 1, "[%s]", (const char*)frame.method_id);
return _buf;
}
@@ -217,3 +256,22 @@ const char* FrameName::name(ASGCT_CallFrame& frame) {
}
}
}
bool FrameName::include(const char* frame_name) {
for (int i = 0; i < _include.size(); i++) {
if (_include[i].matches(frame_name)) {
return true;
}
}
return false;
}
bool FrameName::exclude(const char* frame_name) {
for (int i = 0; i < _exclude.size(); i++) {
if (_exclude[i].matches(frame_name)) {
return true;
}
}
return false;
}

80
src/frameName.h Executable file → Normal file
View File

@@ -18,50 +18,78 @@
#define _FRAMENAME_H
#include <jvmti.h>
#include <locale.h>
#include <map>
#include <vector>
#include <string>
#include "arguments.h"
#include "mutex.h"
#include "vmEntry.h"
class ThreadId {
private:
int _id;
const char* _name;
public:
static int comparator(const void* t1, const void* t2) {
return ((ThreadId*)t1)->_id - ((ThreadId*)t2)->_id;
}
friend class FrameName;
};
#ifdef __APPLE__
# include <xlocale.h>
#endif
typedef std::map<jmethodID, std::string> JMethodCache;
typedef std::map<int, std::string> ThreadMap;
typedef std::map<unsigned int, const char*> ClassMap;
enum MatchType {
MATCH_EQUALS,
MATCH_CONTAINS,
MATCH_STARTS_WITH,
MATCH_ENDS_WITH
};
class Matcher {
private:
MatchType _type;
char* _pattern;
int _len;
public:
Matcher(const char* pattern);
~Matcher();
Matcher(const Matcher& m);
Matcher& operator=(const Matcher& m);
bool matches(const char* s);
};
class FrameName {
private:
JMethodCache _cache;
char _buf[520];
bool _simple;
bool _dotted;
int _thread_count;
ThreadId* _threads;
ClassMap _class_names;
std::vector<Matcher> _include;
std::vector<Matcher> _exclude;
char _buf[800]; // must be large enough for class name + method name + method signature
int _style;
Mutex& _thread_names_lock;
ThreadMap& _thread_names;
locale_t _saved_locale;
void initThreadMap();
const char* findThreadName(int tid);
void buildFilter(std::vector<Matcher>& vector, const char* base, int offset);
char* truncate(char* name, int max_length);
const char* cppDemangle(const char* name);
char* javaMethodName(jmethodID method);
char* javaClassName(const char* symbol, int length, bool simple, bool dotted);
char* javaClassName(const char* symbol, int length, int style);
public:
FrameName(bool simple, bool dotted) : _cache(), _simple(simple), _dotted(dotted), _thread_count(0), _threads(NULL) {
initThreadMap();
}
FrameName(Arguments& args, int style, Mutex& thread_names_lock, ThreadMap& thread_names);
~FrameName();
const char* name(ASGCT_CallFrame& frame);
const char* name(ASGCT_CallFrame& frame, bool for_matching = false);
bool hasIncludeList() { return !_include.empty(); }
bool hasExcludeList() { return !_exclude.empty(); }
bool include(const char* frame_name);
bool exclude(const char* frame_name);
};
#endif // _FRAMENAME_H

593
src/instrument.cpp Normal file
View File

@@ -0,0 +1,593 @@
/*
* Copyright 2019 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <arpa/inet.h>
#include <stdlib.h>
#include <string.h>
#include "arch.h"
#include "os.h"
#include "profiler.h"
#include "vmEntry.h"
#include "instrument.h"
// A class with a single native recordSample() method
static const char INSTRUMENT_CLASS[] =
"\xCA\xFE\xBA\xBE" // magic
"\x00\x00\x00\x32" // version: 50
"\x00\x07" // constant_pool_count: 7
"\x07\x00\x02" // #1 = CONSTANT_Class: #2
"\x01\x00\x17one/profiler/Instrument" // #2 = CONSTANT_Utf8: "one/profiler/Instrument"
"\x07\x00\x04" // #3 = CONSTANT_Class: #4
"\x01\x00\x10java/lang/Object" // #4 = CONSTANT_Utf8: "java/lang/Object"
"\x01\x00\x0CrecordSample" // #5 = CONSTANT_Utf8: "recordSample"
"\x01\x00\x03()V" // #6 = CONSTANT_Utf8: "()V"
"\x00\x21" // access_flags: public super
"\x00\x01" // this_class: #1
"\x00\x03" // super_class: #3
"\x00\x00" // interfaces_count: 0
"\x00\x00" // fields_count: 0
"\x00\x01" // methods_count: 1
"\x01\x09" // access_flags: public static native
"\x00\x05" // name_index: #5
"\x00\x06" // descriptor_index: #6
"\x00\x00" // attributes_count: 0
"\x00"; // attributes_count: 0
enum ConstantTag {
CONSTANT_Utf8 = 1,
CONSTANT_Integer = 3,
CONSTANT_Float = 4,
CONSTANT_Long = 5,
CONSTANT_Double = 6,
CONSTANT_Class = 7,
CONSTANT_String = 8,
CONSTANT_Fieldref = 9,
CONSTANT_Methodref = 10,
CONSTANT_InterfaceMethodref = 11,
CONSTANT_NameAndType = 12,
CONSTANT_MethodHandle = 15,
CONSTANT_MethodType = 16,
CONSTANT_Dynamic = 17,
CONSTANT_InvokeDynamic = 18,
CONSTANT_Module = 19,
CONSTANT_Package = 20
};
class Constant {
private:
u8 _tag;
u8 _info[2];
public:
u8 tag() {
return _tag;
}
int slots() {
return _tag == CONSTANT_Long || _tag == CONSTANT_Double ? 2 : 1;
}
u16 info() {
return (u16)_info[0] << 8 | (u16)_info[1];
}
int length() {
switch (_tag) {
case CONSTANT_Utf8:
return 2 + info();
case CONSTANT_Integer:
case CONSTANT_Float:
case CONSTANT_Fieldref:
case CONSTANT_Methodref:
case CONSTANT_InterfaceMethodref:
case CONSTANT_NameAndType:
case CONSTANT_Dynamic:
case CONSTANT_InvokeDynamic:
return 4;
case CONSTANT_Long:
case CONSTANT_Double:
return 8;
case CONSTANT_Class:
case CONSTANT_String:
case CONSTANT_MethodType:
case CONSTANT_Module:
case CONSTANT_Package:
return 2;
case CONSTANT_MethodHandle:
return 3;
default:
return 0;
}
}
bool equals(const char* value, u16 len) {
return _tag == CONSTANT_Utf8 && info() == len && memcmp(_info + 2, value, len) == 0;
}
bool matches(const char* value, u16 len) {
if (len > 0 && value[len - 1] == '*') {
return _tag == CONSTANT_Utf8 && info() >= len - 1 && memcmp(_info + 2, value, len - 1) == 0;
}
return equals(value, len);
}
};
enum Scope {
SCOPE_CLASS,
SCOPE_FIELD,
SCOPE_METHOD,
SCOPE_REWRITE_METHOD,
SCOPE_REWRITE_CODE
};
enum PatchConstants {
EXTRA_CONSTANTS = 6,
EXTRA_BYTECODES = 4,
EXTRA_STACKMAPS = 1
};
class BytecodeRewriter {
private:
const u8* _src;
const u8* _src_limit;
u8* _dst;
int _dst_len;
int _dst_capacity;
Constant** _cpool;
u16 _cpool_len;
const char* _target_class;
u16 _target_class_len;
const char* _target_method;
u16 _target_method_len;
const char* _target_signature;
u16 _target_signature_len;
// Reader
const u8* get(int bytes) {
const u8* result = _src;
_src += bytes;
return _src <= _src_limit ? result : NULL;
}
u8 get8() {
return *get(1);
}
u16 get16() {
return ntohs(*(u16*)get(2));
}
u32 get32() {
return ntohl(*(u32*)get(4));
}
u64 get64() {
return OS::ntoh64(*(u64*)get(8));
}
Constant* getConstant() {
Constant* c = (Constant*)get(1);
get(c->length());
return c;
}
// Writer
u8* alloc(int bytes) {
if (_dst_len + bytes > _dst_capacity) {
grow(_dst_len + bytes + 2000);
}
u8* result = _dst + _dst_len;
_dst_len += bytes;
return result;
}
void grow(int new_capacity) {
u8* new_dst = NULL;
VM::jvmti()->Allocate(new_capacity, &new_dst);
memcpy(new_dst, _dst, _dst_len);
VM::jvmti()->Deallocate(_dst);
_dst = new_dst;
_dst_capacity = new_capacity;
}
void put(const u8* src, int bytes) {
memcpy(alloc(bytes), src, bytes);
}
void put8(u8 v) {
*alloc(1) = v;
}
void put16(u16 v) {
*(u16*)alloc(2) = htons(v);
}
void put32(u32 v) {
*(u32*)alloc(4) = htonl(v);
}
void put64(u64 v) {
*(u64*)alloc(8) = OS::hton64(v);
}
void putConstant(const char* value) {
u16 len = strlen(value);
put8(CONSTANT_Utf8);
put16(len);
put((const u8*)value, len);
}
void putConstant(u8 tag, u16 ref) {
put8(tag);
put16(ref);
}
void putConstant(u8 tag, u16 ref1, u16 ref2) {
put8(tag);
put16(ref1);
put16(ref2);
}
// BytecodeRewriter
void rewriteCode();
void rewriteBytecodeTable(int data_len);
void rewriteStackMapTable();
void rewriteAttributes(Scope scope);
void rewriteMembers(Scope scope);
bool rewriteClass();
public:
BytecodeRewriter(const u8* class_data, int class_data_len, const char* target_class) :
_src(class_data),
_src_limit(class_data + class_data_len),
_dst(NULL),
_dst_len(0),
_dst_capacity(class_data_len + 400),
_cpool(NULL) {
_target_class = target_class;
_target_class_len = strlen(_target_class);
_target_method = _target_class + _target_class_len + 1;
_target_signature = strchr(_target_method, '(');
if (_target_signature == NULL) {
_target_method_len = strlen(_target_method);
} else {
_target_method_len = _target_signature - _target_method;
_target_signature_len = strlen(_target_signature);
}
}
~BytecodeRewriter() {
delete[] _cpool;
}
void rewrite(u8** new_class_data, int* new_class_data_len) {
if (VM::jvmti()->Allocate(_dst_capacity, &_dst) == 0) {
if (rewriteClass()) {
*new_class_data = _dst;
*new_class_data_len = _dst_len;
} else {
VM::jvmti()->Deallocate(_dst);
}
}
}
};
void BytecodeRewriter::rewriteCode() {
u32 attribute_length = get32();
put32(attribute_length);
int code_begin = _dst_len;
u16 max_stack = get16();
put16(max_stack);
u16 max_locals = get16();
put16(max_locals);
u32 code_length = get32();
put32(code_length + EXTRA_BYTECODES);
// invokestatic "one/profiler/Instrument.recordSample()V"
// nop after invoke helps to prepend StackMapTable without rewriting
put8(0xb8);
put16(_cpool_len);
put8(0);
// The rest of the code is unchanged
put(get(code_length), code_length);
u16 exception_table_length = get16();
put16(exception_table_length);
for (int i = 0; i < exception_table_length; i++) {
u16 start_pc = get16();
u16 end_pc = get16();
u16 handler_pc = get16();
u16 catch_type = get16();
put16(EXTRA_BYTECODES + start_pc);
put16(EXTRA_BYTECODES + end_pc);
put16(EXTRA_BYTECODES + handler_pc);
put16(catch_type);
}
rewriteAttributes(SCOPE_REWRITE_CODE);
// Patch attribute length
*(u32*)(_dst + code_begin - 4) = htonl(_dst_len - code_begin);
}
void BytecodeRewriter::rewriteBytecodeTable(int data_len) {
u32 attribute_length = get32();
put32(attribute_length);
u16 table_length = get16();
put16(table_length);
for (int i = 0; i < table_length; i++) {
u16 start_pc = get16();
put16(EXTRA_BYTECODES + start_pc);
put(get(data_len), data_len);
}
}
void BytecodeRewriter::rewriteStackMapTable() {
u32 attribute_length = get32();
put32(attribute_length + EXTRA_STACKMAPS);
u16 number_of_entries = get16();
put16(number_of_entries + EXTRA_STACKMAPS);
// Prepend same_frame
put8(EXTRA_BYTECODES - 1);
put(get(attribute_length - 2), attribute_length - 2);
}
void BytecodeRewriter::rewriteAttributes(Scope scope) {
u16 attributes_count = get16();
put16(attributes_count);
for (int i = 0; i < attributes_count; i++) {
u16 attribute_name_index = get16();
put16(attribute_name_index);
Constant* attribute_name = _cpool[attribute_name_index];
if (scope == SCOPE_REWRITE_METHOD && attribute_name->equals("Code", 4)) {
rewriteCode();
continue;
} else if (scope == SCOPE_REWRITE_CODE) {
if (attribute_name->equals("LineNumberTable", 15)) {
rewriteBytecodeTable(2);
continue;
} else if (attribute_name->equals("LocalVariableTable", 18) ||
attribute_name->equals("LocalVariableTypeTable", 22)) {
rewriteBytecodeTable(8);
continue;
} else if (attribute_name->equals("StackMapTable", 13)) {
rewriteStackMapTable();
continue;
}
}
u32 attribute_length = get32();
put32(attribute_length);
put(get(attribute_length), attribute_length);
}
}
void BytecodeRewriter::rewriteMembers(Scope scope) {
u16 members_count = get16();
put16(members_count);
for (int i = 0; i < members_count; i++) {
u16 access_flags = get16();
put16(access_flags);
u16 name_index = get16();
put16(name_index);
u16 descriptor_index = get16();
put16(descriptor_index);
bool need_rewrite = scope == SCOPE_METHOD
&& _cpool[name_index]->matches(_target_method, _target_method_len)
&& (_target_signature == NULL || _cpool[descriptor_index]->matches(_target_signature, _target_signature_len));
rewriteAttributes(need_rewrite ? SCOPE_REWRITE_METHOD : SCOPE_METHOD);
}
}
bool BytecodeRewriter::rewriteClass() {
u32 magic = get32();
put32(magic);
u32 version = get32();
put32(version);
_cpool_len = get16();
put16(_cpool_len + EXTRA_CONSTANTS);
const u8* cpool_start = _src;
_cpool = new Constant*[_cpool_len];
for (int i = 1; i < _cpool_len; i += _cpool[i]->slots()) {
_cpool[i] = getConstant();
}
const u8* cpool_end = _src;
put(cpool_start, cpool_end - cpool_start);
putConstant(CONSTANT_Methodref, _cpool_len + 1, _cpool_len + 2);
putConstant(CONSTANT_Class, _cpool_len + 3);
putConstant(CONSTANT_NameAndType, _cpool_len + 4, _cpool_len + 5);
putConstant("one/profiler/Instrument");
putConstant("recordSample");
putConstant("()V");
u16 access_flags = get16();
put16(access_flags);
u16 this_class = get16();
put16(this_class);
u16 class_name_index = _cpool[this_class]->info();
if (!_cpool[class_name_index]->equals(_target_class, _target_class_len)) {
return false;
}
u16 super_class = get16();
put16(super_class);
u16 interfaces_count = get16();
put16(interfaces_count);
put(get(interfaces_count * 2), interfaces_count * 2);
rewriteMembers(SCOPE_FIELD);
rewriteMembers(SCOPE_METHOD);
rewriteAttributes(SCOPE_CLASS);
return true;
}
char* Instrument::_target_class = NULL;
bool Instrument::_instrument_class_loaded = false;
u64 Instrument::_interval;
volatile u64 Instrument::_calls;
volatile bool Instrument::_running;
Error Instrument::check(Arguments& args) {
if (!_instrument_class_loaded) {
JNIEnv* jni = VM::jni();
const JNINativeMethod native_method = {(char*)"recordSample", (char*)"()V", (void*)recordSample};
jclass cls = jni->DefineClass(NULL, NULL, (const jbyte*)INSTRUMENT_CLASS, sizeof(INSTRUMENT_CLASS));
if (cls == NULL || jni->RegisterNatives(cls, &native_method, 1) != 0) {
jni->ExceptionClear();
return Error("Could not load Instrument class");
}
_instrument_class_loaded = true;
}
return Error::OK;
}
Error Instrument::start(Arguments& args) {
Error error = check(args);
if (error) {
return error;
}
if (args._interval < 0) {
return Error("interval must be positive");
}
setupTargetClassAndMethod(args._event_desc);
_interval = args._interval ? args._interval : 1;
_calls = 0;
_running = true;
jvmtiEnv* jvmti = VM::jvmti();
jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_CLASS_FILE_LOAD_HOOK, NULL);
retransformMatchedClasses(jvmti);
return Error::OK;
}
void Instrument::stop() {
_running = false;
jvmtiEnv* jvmti = VM::jvmti();
retransformMatchedClasses(jvmti); // undo transformation
jvmti->SetEventNotificationMode(JVMTI_DISABLE, JVMTI_EVENT_CLASS_FILE_LOAD_HOOK, NULL);
}
void Instrument::setupTargetClassAndMethod(const char* event) {
char* new_class = strdup(event);
*strrchr(new_class, '.') = 0;
for (char* s = new_class; *s; s++) {
if (*s == '.') *s = '/';
}
char* old_class = _target_class;
_target_class = new_class;
free(old_class);
}
void Instrument::retransformMatchedClasses(jvmtiEnv* jvmti) {
jint class_count;
jclass* classes;
if (jvmti->GetLoadedClasses(&class_count, &classes) != 0) {
return;
}
jint matched_count = 0;
size_t len = strlen(_target_class);
for (int i = 0; i < class_count; i++) {
char* signature;
if (jvmti->GetClassSignature(classes[i], &signature, NULL) == 0) {
if (signature[0] == 'L' && strncmp(signature + 1, _target_class, len) == 0 && signature[len + 1] == ';') {
classes[matched_count++] = classes[i];
}
jvmti->Deallocate((unsigned char*)signature);
}
}
if (matched_count > 0) {
jvmti->RetransformClasses(matched_count, classes);
VM::jni()->ExceptionClear();
}
jvmti->Deallocate((unsigned char*)classes);
}
void JNICALL Instrument::ClassFileLoadHook(jvmtiEnv* jvmti, JNIEnv* jni,
jclass class_being_redefined, jobject loader,
const char* name, jobject protection_domain,
jint class_data_len, const u8* class_data,
jint* new_class_data_len, u8** new_class_data) {
// Do not retransform if the profiling has stopped
if (!_running) return;
if (name == NULL || strcmp(name, _target_class) == 0) {
BytecodeRewriter rewriter(class_data, class_data_len, _target_class);
rewriter.rewrite(new_class_data, new_class_data_len);
}
}
void JNICALL Instrument::recordSample(JNIEnv* jni, jobject unused) {
if (!_enabled) return;
if (_interval <= 1 || ((atomicInc(_calls) + 1) % _interval) == 0) {
ExecutionEvent event;
Profiler::_instance.recordSample(NULL, _interval, BCI_INSTRUMENT, &event);
}
}

62
src/instrument.h Normal file
View File

@@ -0,0 +1,62 @@
/*
* Copyright 2019 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _INSTRUMENT_H
#define _INSTRUMENT_H
#include <jvmti.h>
#include "engine.h"
class Instrument : public Engine {
private:
static char* _target_class;
static bool _instrument_class_loaded;
static u64 _interval;
static volatile u64 _calls;
static volatile bool _running;
public:
const char* name() {
return "instrument";
}
const char* units() {
return "calls";
}
CStack cstack() {
return CSTACK_NO;
}
Error check(Arguments& args);
Error start(Arguments& args);
void stop();
void setupTargetClassAndMethod(const char* event);
void retransformMatchedClasses(jvmtiEnv* jvmti);
static void JNICALL ClassFileLoadHook(jvmtiEnv* jvmti, JNIEnv* jni,
jclass class_being_redefined, jobject loader,
const char* name, jobject protection_domain,
jint class_data_len, const u8* class_data,
jint* new_class_data_len, u8** new_class_data);
static void JNICALL recordSample(JNIEnv* jni, jobject unused);
};
#endif // _INSTRUMENT_H

69
src/itimer.cpp Normal file
View File

@@ -0,0 +1,69 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sys/time.h>
#include "itimer.h"
#include "os.h"
#include "profiler.h"
long ITimer::_interval;
void ITimer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
if (!_enabled) return;
ExecutionEvent event;
Profiler::_instance.recordSample(ucontext, _interval, 0, &event);
}
Error ITimer::check(Arguments& args) {
OS::installSignalHandler(SIGPROF, NULL, SIG_IGN);
struct itimerval tv_on = {{1, 0}, {1, 0}};
if (setitimer(ITIMER_PROF, &tv_on, NULL) != 0) {
return Error("ITIMER_PROF is not supported on this system");
}
struct itimerval tv_off = {{0, 0}, {0, 0}};
setitimer(ITIMER_PROF, &tv_off, NULL);
return Error::OK;
}
Error ITimer::start(Arguments& args) {
if (args._interval < 0) {
return Error("interval must be positive");
}
_interval = args._interval ? args._interval : DEFAULT_INTERVAL;
OS::installSignalHandler(SIGPROF, signalHandler);
long sec = _interval / 1000000000;
long usec = (_interval % 1000000000) / 1000;
struct itimerval tv = {{sec, usec}, {sec, usec}};
if (setitimer(ITIMER_PROF, &tv, NULL) != 0) {
return Error("ITIMER_PROF is not supported on this system");
}
return Error::OK;
}
void ITimer::stop() {
struct itimerval tv = {{0, 0}, {0, 0}};
setitimer(ITIMER_PROF, &tv, NULL);
}

44
src/itimer.h Normal file
View File

@@ -0,0 +1,44 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _ITIMER_H
#define _ITIMER_H
#include <signal.h>
#include "engine.h"
class ITimer : public Engine {
private:
static long _interval;
static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext);
public:
const char* name() {
return "itimer";
}
const char* units() {
return "ns";
}
Error check(Arguments& args);
Error start(Arguments& args);
void stop();
};
#endif // _ITIMER_H

View File

@@ -22,6 +22,7 @@
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/syscall.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
@@ -29,35 +30,221 @@
#include <unistd.h>
#define MAX_PATH 1024
#define TMP_PATH (MAX_PATH - 64)
static char tmp_path[TMP_PATH] = {0};
#ifdef __APPLE__
#ifdef __linux__
// macOS has a secure per-user temporary directory
const char* get_temp_directory() {
static char temp_path_storage[MAX_PATH] = {0};
int get_process_info(int pid, uid_t* uid, gid_t* gid, int* nspid) {
// Parse /proc/pid/status to find process credentials
char path[64];
snprintf(path, sizeof(path), "/proc/%d/status", pid);
FILE* status_file = fopen(path, "r");
if (status_file == NULL) {
return 0;
}
if (temp_path_storage[0] == 0) {
int path_size = confstr(_CS_DARWIN_USER_TEMP_DIR, temp_path_storage, MAX_PATH);
if (path_size == 0 || path_size > MAX_PATH) {
strcpy(temp_path_storage, "/tmp");
char* line = NULL;
size_t size;
while (getline(&line, &size, status_file) != -1) {
if (strncmp(line, "Uid:", 4) == 0) {
// Get the effective UID, which is the second value in the line
*uid = (uid_t)atoi(strchr(line + 5, '\t'));
} else if (strncmp(line, "Gid:", 4) == 0) {
// Get the effective GID, which is the second value in the line
*gid = (gid_t)atoi(strchr(line + 5, '\t'));
} else if (strncmp(line, "NStgid:", 7) == 0) {
// PID namespaces can be nested; the last one is the innermost one
*nspid = atoi(strrchr(line, '\t'));
}
}
return temp_path_storage;
free(line);
fclose(status_file);
return 1;
}
#else // __APPLE__
int get_tmp_path(int pid) {
// A process may have its own root path (when running in chroot environment)
char path[64];
snprintf(path, sizeof(path), "/proc/%d/root", pid);
const char* get_temp_directory() {
return "/tmp";
// Append /tmp to the resolved root symlink
ssize_t path_size = readlink(path, tmp_path, sizeof(tmp_path) - 10);
strcpy(tmp_path + (path_size > 1 ? path_size : 0), "/tmp");
return 1;
}
#endif // __APPLE__
int enter_mount_ns(int pid) {
#ifdef __NR_setns
char path[128];
snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
struct stat oldns_stat, newns_stat;
if (stat("/proc/self/ns/mnt", &oldns_stat) == 0 && stat(path, &newns_stat) == 0) {
// Don't try to call setns() if we're in the same namespace already
if (oldns_stat.st_ino != newns_stat.st_ino) {
int newns = open(path, O_RDONLY);
if (newns < 0) {
return 0;
}
// Some ancient Linux distributions do not have setns() function
int result = syscall(__NR_setns, newns, 0);
close(newns);
return result < 0 ? 0 : 1;
}
}
#endif // __NR_setns
return 1;
}
// The first line of /proc/pid/sched looks like
// java (1234, #threads: 12)
// where 1234 is the required host PID
int sched_get_host_pid(const char* path) {
static char* line = NULL;
size_t size;
int result = -1;
FILE* sched_file = fopen(path, "r");
if (sched_file != NULL) {
if (getline(&line, &size, sched_file) != -1) {
char* c = strrchr(line, '(');
if (c != NULL) {
result = atoi(c + 1);
}
}
fclose(sched_file);
}
return result;
}
// Linux kernels < 4.1 do not export NStgid field in /proc/pid/status.
// Fortunately, /proc/pid/sched in a container exposes a host PID,
// so the idea is to scan all container PIDs to find which one matches the host PID.
int alt_lookup_nspid(int pid) {
int namespace_differs = 0;
char path[300];
snprintf(path, sizeof(path), "/proc/%d/ns/pid", pid);
// Don't bother looking for container PID if we are already in the same PID namespace
struct stat oldns_stat, newns_stat;
if (stat("/proc/self/ns/pid", &oldns_stat) == 0 && stat(path, &newns_stat) == 0) {
if (oldns_stat.st_ino == newns_stat.st_ino) {
return pid;
}
namespace_differs = 1;
}
// Otherwise browse all PIDs in the namespace of the target process
// trying to find which one corresponds to the host PID
snprintf(path, sizeof(path), "/proc/%d/root/proc", pid);
DIR* dir = opendir(path);
if (dir != NULL) {
struct dirent* entry;
while ((entry = readdir(dir)) != NULL) {
if (entry->d_name[0] >= '1' && entry->d_name[0] <= '9') {
// Check if /proc/<container-pid>/sched points back to <host-pid>
snprintf(path, sizeof(path), "/proc/%d/root/proc/%s/sched", pid, entry->d_name);
if (sched_get_host_pid(path) == pid) {
closedir(dir);
return atoi(entry->d_name);
}
}
}
closedir(dir);
}
if (namespace_differs) {
printf("WARNING: couldn't find container pid of the target process\n");
}
return pid;
}
#elif defined(__APPLE__)
#include <sys/sysctl.h>
int get_process_info(int pid, uid_t* uid, gid_t* gid, int* nspid) {
int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, pid};
struct kinfo_proc info;
size_t len = sizeof(info);
if (sysctl(mib, 4, &info, &len, NULL, 0) < 0 || len <= 0) {
return 0;
}
*uid = info.kp_eproc.e_ucred.cr_uid;
*gid = info.kp_eproc.e_ucred.cr_gid;
*nspid = pid;
return 1;
}
// macOS has a secure per-user temporary directory
int get_tmp_path(int pid) {
int path_size = confstr(_CS_DARWIN_USER_TEMP_DIR, tmp_path, sizeof(tmp_path));
return path_size > 0 && path_size <= sizeof(tmp_path);
}
// This is a Linux-specific API; nothing to do on macOS and FreeBSD
int enter_mount_ns(int pid) {
return 1;
}
// Not used on macOS and FreeBSD
int alt_lookup_nspid(int pid) {
return pid;
}
#else // __FreeBSD__
#include <sys/sysctl.h>
#include <sys/user.h>
int get_process_info(int pid, uid_t* uid, gid_t* gid, int* nspid) {
int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, pid};
struct kinfo_proc info;
size_t len = sizeof(info);
if (sysctl(mib, 4, &info, &len, NULL, 0) < 0 || len <= 0) {
return 0;
}
*uid = info.ki_uid;
*gid = info.ki_groups[0];
*nspid = pid;
return 1;
}
// Use default /tmp path on FreeBSD
int get_tmp_path(int pid) {
return 0;
}
// This is a Linux-specific API; nothing to do on macOS and FreeBSD
int enter_mount_ns(int pid) {
return 1;
}
// Not used on macOS and FreeBSD
int alt_lookup_nspid(int pid) {
return pid;
}
#endif
// Check if remote JVM has already opened socket for Dynamic Attach
static int check_socket(int pid) {
char path[MAX_PATH];
snprintf(path, MAX_PATH, "%s/.java_pid%d", get_temp_directory(), pid);
snprintf(path, sizeof(path), "%s/.java_pid%d", tmp_path, pid);
struct stat stats;
return stat(path, &stats) == 0 && S_ISSOCK(stats.st_mode);
@@ -80,12 +267,12 @@ static int check_file_owner(const char* path) {
// HotSpot will start Attach listener in response to SIGQUIT if it sees .attach_pid file
static int start_attach_mechanism(int pid, int nspid) {
char path[MAX_PATH];
snprintf(path, MAX_PATH, "/proc/%d/cwd/.attach_pid%d", nspid, nspid);
snprintf(path, sizeof(path), "/proc/%d/cwd/.attach_pid%d", nspid, nspid);
int fd = creat(path, 0660);
if (fd == -1 || (close(fd) == 0 && !check_file_owner(path))) {
// Failed to create attach trigger in current directory. Retry in /tmp
snprintf(path, MAX_PATH, "%s/.attach_pid%d", get_temp_directory(), nspid);
snprintf(path, sizeof(path), "%s/.attach_pid%d", tmp_path, nspid);
fd = creat(path, 0660);
if (fd == -1) {
return 0;
@@ -96,13 +283,13 @@ static int start_attach_mechanism(int pid, int nspid) {
// We have to still use the host namespace pid here for the kill() call
kill(pid, SIGQUIT);
// Start with 20 ms sleep and increment delay each iteration
struct timespec ts = {0, 20000000};
int result;
struct timespec ts = {0, 100000000};
int retry = 0;
do {
nanosleep(&ts, NULL);
result = check_socket(nspid);
} while (!result && ++retry < 10);
} while (!result && (ts.tv_nsec += 20000000) < 300000000);
unlink(path);
return result;
@@ -117,7 +304,10 @@ static int connect_socket(int pid) {
struct sockaddr_un addr;
addr.sun_family = AF_UNIX;
snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.java_pid%d", get_temp_directory(), pid);
int bytes = snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.java_pid%d", tmp_path, pid);
if (bytes >= sizeof(addr.sun_path)) {
addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
}
if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == -1) {
close(fd);
@@ -164,66 +354,12 @@ static int read_response(int fd) {
return result;
}
// On Linux, get the innermost pid namespace pid for the specified host pid
static int nspid_for_pid(int pid) {
#ifdef __linux__
char status[64];
snprintf(status, sizeof(status), "/proc/%d/status", pid);
FILE* status_file = fopen(status, "r");
if (status_file != NULL) {
char* line = NULL;
size_t size;
while (getline(&line, &size, status_file) != -1) {
if (strstr(line, "NStgid:") != NULL) {
// PID namespaces can be nested; the last one is the innermost one
pid = (int)strtol(strrchr(line, '\t'), NULL, 10);
}
}
free(line);
fclose(status_file);
}
#endif
return pid;
}
static int enter_mount_ns(int pid) {
#ifdef __linux__
// We're leaking the oldns and newns descriptors, but this is a short-running
// tool, so they will be closed when the process exits anyway.
int oldns, newns;
char curnspath[128], newnspath[128];
struct stat oldns_stat, newns_stat;
snprintf(curnspath, sizeof(curnspath), "/proc/self/ns/mnt");
snprintf(newnspath, sizeof(newnspath), "/proc/%d/ns/mnt", pid);
if ((oldns = open(curnspath, O_RDONLY)) < 0 ||
((newns = open(newnspath, O_RDONLY)) < 0)) {
return 0;
}
if (fstat(oldns, &oldns_stat) < 0 || fstat(newns, &newns_stat) < 0) {
return 0;
}
if (oldns_stat.st_ino == newns_stat.st_ino) {
// Don't try to call setns() if we're in the same namespace already.
return 1;
}
// Some ancient Linux distributions do not have setns() function
return syscall(__NR_setns, newns, 0) < 0 ? 0 : 1;
#else
return 1;
#endif
}
int main(int argc, char** argv) {
if (argc < 3) {
printf("Usage: jattach <pid> <cmd> <args> ...\n");
printf("jattach " JATTACH_VERSION " built on " __DATE__ "\n"
"Copyright 2018 Andrei Pangin\n"
"\n"
"Usage: jattach <pid> <cmd> [args ...]\n");
return 1;
}
@@ -233,9 +369,40 @@ int main(int argc, char** argv) {
return 1;
}
int nspid = nspid_for_pid(pid);
if (enter_mount_ns(pid) < 0) {
fprintf(stderr, "WARNING: couldn't enter target process mnt namespace\n");
uid_t my_uid = geteuid();
gid_t my_gid = getegid();
uid_t target_uid = my_uid;
gid_t target_gid = my_gid;
int nspid = -1;
if (!get_process_info(pid, &target_uid, &target_gid, &nspid)) {
fprintf(stderr, "Process %d not found\n", pid);
return 1;
}
if (nspid < 0) {
nspid = alt_lookup_nspid(pid);
}
// Get attach socket path of the target process (usually /tmp)
char* jattach_path = getenv("JATTACH_PATH");
if (jattach_path != NULL && strlen(jattach_path) < TMP_PATH) {
strcpy(tmp_path, jattach_path);
} else {
// Make sure our /tmp and target /tmp is the same
if (!get_tmp_path(pid)) {
strcpy(tmp_path, "/tmp");
}
if (!enter_mount_ns(pid)) {
printf("WARNING: couldn't enter target process mnt namespace\n");
}
}
// Dynamic attach is allowed only for the clients with the same euid/egid.
// If we are running under root, switch to the required euid/egid automatically.
if ((my_gid != target_gid && setegid(target_gid) != 0) ||
(my_uid != target_uid && seteuid(target_uid) != 0)) {
perror("Failed to change credentials to match the target process");
return 1;
}
// Make write() return EPIPE instead of silent process termination

View File

@@ -1,89 +0,0 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package one.profiler;
/**
* Java API for in-process profiling. Serves as a wrapper around
* async-profiler native library. This class is a singleton.
* The first call to {@link #getInstance()} initiates loading of
* libasyncProfiler.so.
*/
public class AsyncProfiler implements AsyncProfilerMXBean {
private static AsyncProfiler instance;
private AsyncProfiler() {
}
public static AsyncProfiler getInstance() {
return getInstance(null);
}
public static synchronized AsyncProfiler getInstance(String libPath) {
if (instance != null) {
return instance;
}
if (libPath == null) {
System.loadLibrary("asyncProfiler");
} else {
System.load(libPath);
}
instance = new AsyncProfiler();
return instance;
}
@Override
public void start(String event, long interval) {
start0(event, interval);
}
@Override
public void stop() {
stop0();
}
@Override
public native long getSamples();
@Override
public String execute(String command) {
return execute0(command);
}
@Override
public String dumpCollapsed(Counter counter) {
return dumpCollapsed0(counter.ordinal());
}
@Override
public String dumpTraces(int maxTraces) {
return dumpTraces0(maxTraces);
}
@Override
public String dumpFlat(int maxMethods) {
return dumpFlat0(maxMethods);
}
private native void start0(String event, long interval);
private native void stop0();
private native String execute0(String command);
private native String dumpCollapsed0(int counter);
private native String dumpTraces0(int maxTraces);
private native String dumpFlat0(int maxMethods);
}

143
src/javaApi.cpp Executable file → Normal file
View File

@@ -14,27 +14,28 @@
* limitations under the License.
*/
#include <fstream>
#include <sstream>
#include <errno.h>
#include <string.h>
#include "javaApi.h"
#include "arguments.h"
#include "os.h"
#include "profiler.h"
static void throw_new(JNIEnv* env, const char* exception_class, const char* message) {
jclass cls = env->FindClass(exception_class);
if (cls != NULL) {
env->ThrowNew(cls, message);
}
}
#include "vmStructs.h"
extern "C" JNIEXPORT void JNICALL
Java_one_profiler_AsyncProfiler_start0(JNIEnv* env, jobject unused, jstring event, jlong interval) {
Java_one_profiler_AsyncProfiler_start0(JNIEnv* env, jobject unused, jstring event, jlong interval, jboolean reset) {
Arguments args;
const char* event_str = env->GetStringUTFChars(event, NULL);
Error error = Profiler::_instance.start(event_str, interval, DEFAULT_FRAMEBUF, false);
args.addEvent(event_str);
args._interval = interval;
Error error = Profiler::_instance.start(args, reset);
env->ReleaseStringUTFChars(event, event_str);
if (error) {
throw_new(env, "java/lang/IllegalStateException", error.message());
JavaAPI::throwNew(env, "java/lang/IllegalStateException", error.message());
}
}
@@ -43,15 +44,10 @@ Java_one_profiler_AsyncProfiler_stop0(JNIEnv* env, jobject unused) {
Error error = Profiler::_instance.stop();
if (error) {
throw_new(env, "java/lang/IllegalStateException", error.message());
JavaAPI::throwNew(env, "java/lang/IllegalStateException", error.message());
}
}
extern "C" JNIEXPORT jlong JNICALL
Java_one_profiler_AsyncProfiler_getSamples(JNIEnv* env, jobject unused) {
return (jlong)Profiler::_instance.total_samples();
}
extern "C" JNIEXPORT jstring JNICALL
Java_one_profiler_AsyncProfiler_execute0(JNIEnv* env, jobject unused, jstring command) {
Arguments args;
@@ -60,37 +56,102 @@ Java_one_profiler_AsyncProfiler_execute0(JNIEnv* env, jobject unused, jstring co
env->ReleaseStringUTFChars(command, command_str);
if (error) {
throw_new(env, "java/lang/IllegalArgumentException", error.message());
JavaAPI::throwNew(env, "java/lang/IllegalArgumentException", error.message());
return NULL;
}
std::ostringstream out;
Profiler::_instance.runInternal(args, out);
return env->NewStringUTF(out.str().c_str());
if (args._file == NULL || args._output == OUTPUT_JFR) {
std::ostringstream out;
Profiler::_instance.runInternal(args, out);
return env->NewStringUTF(out.str().c_str());
} else {
std::ofstream out(args._file, std::ios::out | std::ios::trunc);
if (out.is_open()) {
Profiler::_instance.runInternal(args, out);
out.close();
return env->NewStringUTF("OK");
} else {
JavaAPI::throwNew(env, "java/io/IOException", strerror(errno));
return NULL;
}
}
}
extern "C" JNIEXPORT jstring JNICALL
Java_one_profiler_AsyncProfiler_dumpCollapsed0(JNIEnv* env, jobject unused, jint counter) {
Arguments args;
args._counter = counter == COUNTER_SAMPLES ? COUNTER_SAMPLES : COUNTER_TOTAL;
std::ostringstream out;
Profiler::_instance.dumpCollapsed(out, args);
return env->NewStringUTF(out.str().c_str());
extern "C" JNIEXPORT jlong JNICALL
Java_one_profiler_AsyncProfiler_getSamples(JNIEnv* env, jobject unused) {
return (jlong)Profiler::_instance.total_samples();
}
extern "C" JNIEXPORT jstring JNICALL
Java_one_profiler_AsyncProfiler_dumpTraces0(JNIEnv* env, jobject unused, jint max_traces) {
std::ostringstream out;
Profiler::_instance.dumpSummary(out);
Profiler::_instance.dumpTraces(out, max_traces ? max_traces : MAX_CALLTRACES);
return env->NewStringUTF(out.str().c_str());
extern "C" JNIEXPORT void JNICALL
Java_one_profiler_AsyncProfiler_filterThread0(JNIEnv* env, jobject unused, jthread thread, jboolean enable) {
int thread_id;
if (thread == NULL) {
thread_id = OS::threadId();
} else if (VMThread::hasNativeId()) {
VMThread* vmThread = VMThread::fromJavaThread(env, thread);
if (vmThread == NULL) {
return;
}
thread_id = vmThread->osThreadId();
} else {
return;
}
ThreadFilter* thread_filter = Profiler::_instance.threadFilter();
if (enable) {
thread_filter->add(thread_id);
} else {
thread_filter->remove(thread_id);
}
}
extern "C" JNIEXPORT jstring JNICALL
Java_one_profiler_AsyncProfiler_dumpFlat0(JNIEnv* env, jobject unused, jint max_methods) {
std::ostringstream out;
Profiler::_instance.dumpSummary(out);
Profiler::_instance.dumpFlat(out, max_methods ? max_methods : MAX_CALLTRACES);
return env->NewStringUTF(out.str().c_str());
#define F(name, sig) {(char*)#name, (char*)sig, (void*)Java_one_profiler_AsyncProfiler_##name}
static const JNINativeMethod profiler_natives[] = {
F(start0, "(Ljava/lang/String;JZ)V"),
F(stop0, "()V"),
F(execute0, "(Ljava/lang/String;)Ljava/lang/String;"),
F(getSamples, "()J"),
F(filterThread0, "(Ljava/lang/Thread;Z)V"),
};
#undef F
void JavaAPI::throwNew(JNIEnv* env, const char* exception_class, const char* message) {
jclass cls = env->FindClass(exception_class);
if (cls != NULL) {
env->ThrowNew(cls, message);
}
}
// Since AsyncProfiler class can be renamed or moved to another package (shaded),
// we look for the actual class in the stack trace.
void JavaAPI::registerNatives(jvmtiEnv* jvmti, JNIEnv* jni) {
jvmtiFrameInfo frame[10];
jint frame_count;
if (jvmti->GetStackTrace(NULL, 0, sizeof(frame) / sizeof(frame[0]), frame, &frame_count) != 0) {
return;
}
jclass System = jni->FindClass("java/lang/System");
jmethodID load = jni->GetStaticMethodID(System, "load", "(Ljava/lang/String;)V");
jmethodID loadLibrary = jni->GetStaticMethodID(System, "loadLibrary", "(Ljava/lang/String;)V");
// Look for System.load() or System.loadLibrary() method in the stack trace.
// The next frame will belong to AsyncProfiler class.
for (int i = 0; i < frame_count - 1; i++) {
if (frame[i].method == load || frame[i].method == loadLibrary) {
jclass profiler_class;
if (jvmti->GetMethodDeclaringClass(frame[i + 1].method, &profiler_class) == 0) {
for (int j = 0; j < sizeof(profiler_natives) / sizeof(JNINativeMethod); j++) {
jni->RegisterNatives(profiler_class, &profiler_natives[j], 1);
}
}
break;
}
}
jni->ExceptionClear();
}

29
src/javaApi.h Normal file
View File

@@ -0,0 +1,29 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _JAVAAPI_H
#define _JAVAAPI_H
#include <jvmti.h>
class JavaAPI {
public:
static void throwNew(JNIEnv* env, const char* exception_class, const char* message);
static void registerNatives(jvmtiEnv* jvmti, JNIEnv* jni);
};
#endif // _JAVAAPI_H

213
src/jfrMetadata.cpp Normal file
View File

@@ -0,0 +1,213 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "jfrMetadata.h"
std::map<std::string, int> Element::_string_map;
std::vector<std::string> Element::_strings;
JfrMetadata JfrMetadata::_root;
JfrMetadata::JfrMetadata() : Element("root") {
*this
<< (element("metadata")
<< type("boolean", T_BOOLEAN)
<< type("char", T_CHAR)
<< type("float", T_FLOAT)
<< type("double", T_DOUBLE)
<< type("byte", T_BYTE)
<< type("short", T_SHORT)
<< type("int", T_INT)
<< type("long", T_LONG)
<< type("java.lang.String", T_STRING)
<< (type("java.lang.Class", T_CLASS, "Java Class")
<< field("classLoader", T_CLASS_LOADER, "Class Loader", F_CPOOL)
<< field("name", T_SYMBOL, "Name", F_CPOOL)
<< field("package", T_PACKAGE, "Package", F_CPOOL)
<< field("modifiers", T_INT, "Access Modifiers"))
<< (type("java.lang.Thread", T_THREAD, "Thread")
<< field("osName", T_STRING, "OS Thread Name")
<< field("osThreadId", T_LONG, "OS Thread Id")
<< field("javaName", T_STRING, "Java Thread Name")
<< field("javaThreadId", T_LONG, "Java Thread Id"))
<< (type("jdk.types.ClassLoader", T_CLASS_LOADER, "Java Class Loader")
<< field("type", T_CLASS, "Type", F_CPOOL)
<< field("name", T_SYMBOL, "Name", F_CPOOL))
<< (type("jdk.types.FrameType", T_FRAME_TYPE, "Frame type", true)
<< field("description", T_STRING, "Description"))
<< (type("jdk.types.ThreadState", T_THREAD_STATE, "Java Thread State", true)
<< field("name", T_STRING, "Name"))
<< (type("jdk.types.StackTrace", T_STACK_TRACE, "Stacktrace")
<< field("truncated", T_BOOLEAN, "Truncated")
<< field("frames", T_STACK_FRAME, "Stack Frames", F_ARRAY))
<< (type("jdk.types.StackFrame", T_STACK_FRAME)
<< field("method", T_METHOD, "Java Method", F_CPOOL)
<< field("lineNumber", T_INT, "Line Number")
<< field("bytecodeIndex", T_INT, "Bytecode Index")
<< field("type", T_FRAME_TYPE, "Frame Type", F_CPOOL))
<< (type("jdk.types.Method", T_METHOD, "Java Method")
<< field("type", T_CLASS, "Type", F_CPOOL)
<< field("name", T_SYMBOL, "Name", F_CPOOL)
<< field("descriptor", T_SYMBOL, "Descriptor", F_CPOOL)
<< field("modifiers", T_INT, "Access Modifiers")
<< field("hidden", T_BOOLEAN, "Hidden"))
<< (type("jdk.types.Package", T_PACKAGE, "Package")
<< field("name", T_SYMBOL, "Name", F_CPOOL))
<< (type("jdk.types.Symbol", T_SYMBOL, "Symbol", true)
<< field("string", T_STRING, "String"))
<< (type("jdk.ExecutionSample", T_EXECUTION_SAMPLE, "Method Profiling Sample")
<< category("Java Virtual Machine", "Profiling")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("sampledThread", T_THREAD, "Thread", F_CPOOL)
<< field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL)
<< field("state", T_THREAD_STATE, "Thread State", F_CPOOL))
<< (type("jdk.ObjectAllocationInNewTLAB", T_ALLOC_IN_NEW_TLAB, "Allocation in new TLAB")
<< category("Java Application")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
<< field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL)
<< field("objectClass", T_CLASS, "Object Class", F_CPOOL)
<< field("allocationSize", T_LONG, "Allocation Size", F_BYTES)
<< field("tlabSize", T_LONG, "TLAB Size", F_BYTES))
<< (type("jdk.ObjectAllocationOutsideTLAB", T_ALLOC_OUTSIDE_TLAB, "Allocation outside TLAB")
<< category("Java Application")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
<< field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL)
<< field("objectClass", T_CLASS, "Object Class", F_CPOOL)
<< field("allocationSize", T_LONG, "Allocation Size", F_BYTES))
<< (type("jdk.JavaMonitorEnter", T_MONITOR_ENTER, "Java Monitor Blocked")
<< category("Java Application")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("duration", T_LONG, "Duration", F_DURATION_TICKS)
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
<< field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL)
<< field("monitorClass", T_CLASS, "Monitor Class", F_CPOOL)
<< field("address", T_LONG, "Monitor Address", F_ADDRESS))
<< (type("jdk.ThreadPark", T_THREAD_PARK, "Java Thread Park")
<< category("Java Application")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("duration", T_LONG, "Duration", F_DURATION_TICKS)
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
<< field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL)
<< field("parkedClass", T_CLASS, "Class Parked On", F_CPOOL)
<< field("timeout", T_LONG, "Park Timeout", F_DURATION_NANOS)
<< field("address", T_LONG, "Address of Object Parked", F_ADDRESS))
<< (type("jdk.CPULoad", T_CPU_LOAD, "CPU Load")
<< category("Operating System", "Processor")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("jvmUser", T_FLOAT, "JVM User", F_PERCENTAGE)
<< field("jvmSystem", T_FLOAT, "JVM System", F_PERCENTAGE)
<< field("machineTotal", T_FLOAT, "Machine Total", F_PERCENTAGE))
<< (type("jdk.ActiveRecording", T_ACTIVE_RECORDING, "Flight Recording")
<< category("Flight Recorder")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("duration", T_LONG, "Duration", F_DURATION_TICKS)
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
<< field("id", T_LONG, "Id")
<< field("name", T_STRING, "Name")
<< field("destination", T_STRING, "Destination")
<< field("maxAge", T_LONG, "Max Age", F_DURATION_MILLIS)
<< field("maxSize", T_LONG, "Max Size", F_BYTES)
<< field("recordingStart", T_LONG, "Start Time", F_TIME_MILLIS)
<< field("recordingDuration", T_LONG, "Recording Duration", F_DURATION_MILLIS))
<< (type("jdk.ActiveSetting", T_ACTIVE_SETTING, "Recording Setting")
<< category("Flight Recorder")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("duration", T_LONG, "Duration", F_DURATION_TICKS)
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
<< field("id", T_LONG, "Event Id")
<< field("name", T_STRING, "Setting Name")
<< field("value", T_STRING, "Setting Value"))
<< (type("jdk.OSInformation", T_OS_INFORMATION, "OS Information")
<< category("Operating System")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("osVersion", T_STRING, "OS Version"))
<< (type("jdk.CPUInformation", T_CPU_INFORMATION, "CPU Information")
<< category("Operating System", "Processor")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("cpu", T_STRING, "Type")
<< field("description", T_STRING, "Description")
<< field("sockets", T_INT, "Sockets", F_UNSIGNED)
<< field("cores", T_INT, "Cores", F_UNSIGNED)
<< field("hwThreads", T_INT, "Hardware Threads", F_UNSIGNED))
<< (type("jdk.JVMInformation", T_JVM_INFORMATION, "JVM Information")
<< category("Java Virtual Machine")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("jvmName", T_STRING, "JVM Name")
<< field("jvmVersion", T_STRING, "JVM Version")
<< field("jvmArguments", T_STRING, "JVM Command Line Arguments")
<< field("jvmFlags", T_STRING, "JVM Settings File Arguments")
<< field("javaArguments", T_STRING, "Java Application Arguments")
<< field("jvmStartTime", T_LONG, "JVM Start Time", F_TIME_MILLIS)
<< field("pid", T_LONG, "Process Identifier"))
<< (type("jdk.InitialSystemProperty", T_INITIAL_SYSTEM_PROPERTY, "Initial System Property")
<< category("Java Virtual Machine")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("key", T_STRING, "Key")
<< field("value", T_STRING, "Value"))
<< (type("jdk.jfr.Label", T_LABEL, NULL)
<< field("value", T_STRING))
<< (type("jdk.jfr.Category", T_CATEGORY, NULL)
<< field("value", T_STRING, NULL, F_ARRAY))
<< (type("jdk.jfr.Timestamp", T_TIMESTAMP, "Timestamp")
<< field("value", T_STRING))
<< (type("jdk.jfr.Timespan", T_TIMESPAN, "Timespan")
<< field("value", T_STRING))
<< (type("jdk.jfr.DataAmount", T_DATA_AMOUNT, "Data Amount")
<< field("value", T_STRING))
<< type("jdk.jfr.MemoryAddress", T_MEMORY_ADDRESS, "Memory Address")
<< type("jdk.jfr.Unsigned", T_UNSIGNED, "Unsigned Value")
<< type("jdk.jfr.Percentage", T_PERCENTAGE, "Percentage"))
<< element("region").attribute("locale", "en_US").attribute("gmtOffset", "0");
// The map is used only during construction
_string_map.clear();
}

231
src/jfrMetadata.h Normal file
View File

@@ -0,0 +1,231 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _JFRMETADATA_H
#define _JFRMETADATA_H
#include <string>
#include <map>
#include <vector>
#include <stdio.h>
#include <string.h>
enum JfrType {
T_METADATA = 0,
T_CPOOL = 1,
T_BOOLEAN = 4,
T_CHAR = 5,
T_FLOAT = 6,
T_DOUBLE = 7,
T_BYTE = 8,
T_SHORT = 9,
T_INT = 10,
T_LONG = 11,
T_STRING = 20,
T_CLASS = 21,
T_THREAD = 22,
T_CLASS_LOADER = 23,
T_FRAME_TYPE = 24,
T_THREAD_STATE = 25,
T_STACK_TRACE = 26,
T_STACK_FRAME = 27,
T_METHOD = 28,
T_PACKAGE = 29,
T_SYMBOL = 30,
T_EVENT = 100,
T_EXECUTION_SAMPLE = 101,
T_ALLOC_IN_NEW_TLAB = 102,
T_ALLOC_OUTSIDE_TLAB = 103,
T_MONITOR_ENTER = 104,
T_THREAD_PARK = 105,
T_CPU_LOAD = 106,
T_ACTIVE_RECORDING = 107,
T_ACTIVE_SETTING = 108,
T_OS_INFORMATION = 109,
T_CPU_INFORMATION = 110,
T_JVM_INFORMATION = 111,
T_INITIAL_SYSTEM_PROPERTY = 112,
T_ANNOTATION = 200,
T_LABEL = 201,
T_CATEGORY = 202,
T_TIMESTAMP = 203,
T_TIMESPAN = 204,
T_DATA_AMOUNT = 205,
T_MEMORY_ADDRESS = 206,
T_UNSIGNED = 207,
T_PERCENTAGE = 208,
};
class Attribute {
public:
int _key;
int _value;
Attribute(int key, int value) : _key(key), _value(value) {
}
};
class Element {
protected:
static std::map<std::string, int> _string_map;
static std::vector<std::string> _strings;
static int getId(const char* s) {
std::string str(s);
int id = _string_map[str];
if (id == 0) {
id = _string_map[str] = _string_map.size();
_strings.push_back(str);
}
return id - 1;
}
public:
const int _name;
std::vector<Attribute> _attributes;
std::vector<const Element*> _children;
Element(const char* name) : _name(getId(name)), _attributes(), _children() {
}
Element& attribute(const char* key, const char* value) {
_attributes.push_back(Attribute(getId(key), getId(value)));
return *this;
}
Element& attribute(const char* key, JfrType value) {
char value_str[16];
sprintf(value_str, "%d", value);
return attribute(key, value_str);
}
Element& operator<<(const Element& child) {
_children.push_back(&child);
return *this;
}
};
class JfrMetadata : Element {
private:
static JfrMetadata _root;
enum FieldFlags {
F_CPOOL = 0x1,
F_ARRAY = 0x2,
F_UNSIGNED = 0x4,
F_BYTES = 0x8,
F_TIME_TICKS = 0x10,
F_TIME_MILLIS = 0x20,
F_DURATION_TICKS = 0x40,
F_DURATION_NANOS = 0x80,
F_DURATION_MILLIS = 0x100,
F_ADDRESS = 0x200,
F_PERCENTAGE = 0x400,
};
static Element& element(const char* name) {
return *new Element(name);
}
static Element& type(const char* name, JfrType id, const char* label = NULL, bool simple = false) {
Element& e = element("class");
e.attribute("name", name);
e.attribute("id", id);
if (simple) {
e.attribute("simpleType", "true");
} else if (id > T_ANNOTATION) {
e.attribute("superType", "java.lang.annotation.Annotation");
} else if (id > T_EVENT) {
e.attribute("superType", "jdk.jfr.Event");
}
if (label != NULL) {
e << annotation(T_LABEL, label);
}
return e;
}
static Element& field(const char* name, JfrType type, const char* label = NULL, int flags = 0) {
Element& e = element("field");
e.attribute("name", name);
e.attribute("class", type);
if (flags & F_CPOOL) {
e.attribute("constantPool", "true");
}
if (flags & F_ARRAY) {
e.attribute("dimension", "1");
}
if (label != NULL) {
e << annotation(T_LABEL, label);
}
if (flags & F_UNSIGNED) {
e << annotation(T_UNSIGNED);
} else if (flags & F_BYTES) {
e << annotation(T_UNSIGNED) << annotation(T_DATA_AMOUNT, "BYTES");
} else if (flags & F_TIME_TICKS) {
e << annotation(T_TIMESTAMP, "TICKS");
} else if (flags & F_TIME_MILLIS) {
e << annotation(T_TIMESTAMP, "MILLISECONDS_SINCE_EPOCH");
} else if (flags & F_DURATION_TICKS) {
e << annotation(T_TIMESPAN, "TICKS");
} else if (flags & F_DURATION_NANOS) {
e << annotation(T_TIMESPAN, "NANOSECONDS");
} else if (flags & F_DURATION_MILLIS) {
e << annotation(T_TIMESPAN, "MILLISECONDS");
} else if (flags & F_ADDRESS) {
e << annotation(T_UNSIGNED) << annotation(T_MEMORY_ADDRESS);
} else if (flags & F_PERCENTAGE) {
e << annotation(T_PERCENTAGE);
}
return e;
}
static Element& annotation(JfrType type, const char* value = NULL) {
Element& e = element("annotation");
e.attribute("class", type);
if (value != NULL) {
e.attribute("value", value);
}
return e;
}
static Element& category(const char* value0, const char* value1 = NULL) {
Element& e = annotation(T_CATEGORY);
e.attribute("value-0", value0);
if (value1 != NULL) {
e.attribute("value-1", value1);
}
return e;
}
public:
JfrMetadata();
static Element* root() {
return &_root;
}
static std::vector<std::string>& strings() {
return _strings;
}
};
#endif // _JFRMETADATA_H

71
src/jstack.cpp Normal file
View File

@@ -0,0 +1,71 @@
/*
* Copyright 2021 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <signal.h>
#include <time.h>
#include "jstack.h"
#include "profiler.h"
// Wait at most this number of milliseconds to finish processing of pending signals
const int MAX_WAIT_MILLIS = 2000;
void JStack::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
ExecutionEvent event;
event._thread_state = Profiler::_instance.getThreadState(ucontext);
Profiler::_instance.recordSample(ucontext, 1, 0, &event);
}
Error JStack::start(Arguments& args) {
OS::installSignalHandler(SIGVTALRM, signalHandler);
int self = OS::threadId();
u64 required_samples = Profiler::_instance.total_samples();
ThreadFilter* thread_filter = Profiler::_instance.threadFilter();
bool thread_filter_enabled = thread_filter->enabled();
ThreadList* thread_list = OS::listThreads();
int thread_id;
while ((thread_id = thread_list->next()) != -1) {
if (thread_id != self && (!thread_filter_enabled || thread_filter->accept(thread_id))) {
if (OS::sendSignalToThread(thread_id, SIGVTALRM)) {
required_samples++;
}
}
}
delete thread_list;
// Get our own stack trace after all other threads
if (!thread_filter_enabled || thread_filter->accept(self)) {
ExecutionEvent event;
event._thread_state = THREAD_RUNNING;
Profiler::_instance.recordSample(NULL, 1, 0, &event);
required_samples++;
}
// Wait until all asynchronous stack traces collected
for (int i = 0; Profiler::_instance.total_samples() < required_samples && i < MAX_WAIT_MILLIS; i++) {
struct timespec timeout = {0, 1000000};
nanosleep(&timeout, NULL);
}
return Error::OK;
}
void JStack::stop() {
// Nothing to do
}

41
src/jstack.h Normal file
View File

@@ -0,0 +1,41 @@
/*
* Copyright 2021 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _JSTACK_H
#define _JSTACK_H
#include <signal.h>
#include "engine.h"
class JStack : public Engine {
private:
static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext);
public:
const char* name() {
return EVENT_JSTACK;
}
const char* units() {
return "samples";
}
Error start(Arguments& args);
void stop();
};
#endif // _JSTACK_H

106
src/linearAllocator.cpp Normal file
View File

@@ -0,0 +1,106 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "linearAllocator.h"
#include "os.h"
LinearAllocator::LinearAllocator(size_t chunk_size) {
_chunk_size = chunk_size;
_reserve = _tail = allocateChunk(NULL);
}
LinearAllocator::~LinearAllocator() {
clear();
freeChunk(_tail);
}
void LinearAllocator::clear() {
if (_reserve->prev == _tail) {
freeChunk(_reserve);
}
while (_tail->prev != NULL) {
Chunk* current = _tail;
_tail = _tail->prev;
freeChunk(current);
}
_reserve = _tail;
_tail->offs = sizeof(Chunk);
}
void* LinearAllocator::alloc(size_t size) {
Chunk* chunk = _tail;
do {
// Fast path: bump a pointer with CAS
for (size_t offs = chunk->offs; offs + size <= _chunk_size; offs = chunk->offs) {
if (__sync_bool_compare_and_swap(&chunk->offs, offs, offs + size)) {
if (_chunk_size / 2 - offs < size) {
// Stepped over a middle of the chunk - it's time to prepare a new one
reserveChunk(chunk);
}
return (char*)chunk + offs;
}
}
} while ((chunk = getNextChunk(chunk)) != NULL);
return NULL;
}
Chunk* LinearAllocator::allocateChunk(Chunk* current) {
Chunk* chunk = (Chunk*)OS::safeAlloc(_chunk_size);
if (chunk != NULL) {
chunk->prev = current;
chunk->offs = sizeof(Chunk);
}
return chunk;
}
void LinearAllocator::freeChunk(Chunk* current) {
OS::safeFree(current, _chunk_size);
}
void LinearAllocator::reserveChunk(Chunk* current) {
Chunk* reserve = allocateChunk(current);
if (reserve != NULL && !__sync_bool_compare_and_swap(&_reserve, current, reserve)) {
// Unlikely case that we are too late
freeChunk(reserve);
}
}
Chunk* LinearAllocator::getNextChunk(Chunk* current) {
Chunk* reserve = _reserve;
if (reserve == current) {
// Unlikely case: no reserve yet.
// It's probably being allocated right now, so let's compete
reserve = allocateChunk(current);
if (reserve == NULL) {
// Not enough memory
return NULL;
}
Chunk* prev_reserve = __sync_val_compare_and_swap(&_reserve, current, reserve);
if (prev_reserve != current) {
freeChunk(reserve);
reserve = prev_reserve;
}
}
// Expected case: a new chunk is already reserved
Chunk* tail = __sync_val_compare_and_swap(&_tail, current, reserve);
return tail == current ? reserve : tail;
}

50
src/linearAllocator.h Normal file
View File

@@ -0,0 +1,50 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _LINEARALLOCATOR_H
#define _LINEARALLOCATOR_H
#include <stddef.h>
struct Chunk {
Chunk* prev;
volatile size_t offs;
// To avoid false sharing
char _padding[56];
};
class LinearAllocator {
private:
size_t _chunk_size;
Chunk* _tail;
Chunk* _reserve;
Chunk* allocateChunk(Chunk* current);
void freeChunk(Chunk* current);
void reserveChunk(Chunk* current);
Chunk* getNextChunk(Chunk* current);
public:
LinearAllocator(size_t chunk_size);
~LinearAllocator();
void clear();
void* alloc(size_t size);
};
#endif // _LINEARALLOCATOR_H

122
src/lockTracer.cpp Executable file → Normal file
View File

@@ -16,6 +16,7 @@
#include <string.h>
#include "lockTracer.h"
#include "os.h"
#include "profiler.h"
#include "vmStructs.h"
@@ -23,23 +24,13 @@
jlong LockTracer::_start_time = 0;
jclass LockTracer::_LockSupport = NULL;
jmethodID LockTracer::_getBlocker = NULL;
UnsafeParkFunc LockTracer::_original_Unsafe_Park = NULL;
Error LockTracer::start(const char* event, long interval) {
NativeCodeCache* libjvm = Profiler::_instance.jvmLibrary();
if (libjvm == NULL) {
return Error("libjvm not found among loaded libraries");
}
if (!VMStructs::init(libjvm)) {
return Error("VMStructs unavailable. Unsupported JVM?");
}
Error LockTracer::start(Arguments& args) {
// Enable Java Monitor events
jvmtiEnv* jvmti = VM::jvmti();
jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_MONITOR_CONTENDED_ENTER, NULL);
jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_MONITOR_CONTENDED_ENTERED, NULL);
jvmti->GetTime(&_start_time);
_start_time = OS::nanotime();
if (_getBlocker == NULL) {
JNIEnv* env = VM::jni();
@@ -47,12 +38,8 @@ Error LockTracer::start(const char* event, long interval) {
_getBlocker = env->GetStaticMethodID(_LockSupport, "getBlocker", "(Ljava/lang/Thread;)Ljava/lang/Object;");
}
if (_original_Unsafe_Park == NULL) {
_original_Unsafe_Park = (UnsafeParkFunc)libjvm->findSymbol("Unsafe_Park");
}
// Intercent Unsafe.park() for tracing contended ReentrantLocks
if (_original_Unsafe_Park != NULL) {
if (VMStructs::_unsafe_park != NULL) {
bindUnsafePark(UnsafeParkTrap);
}
@@ -66,83 +53,94 @@ void LockTracer::stop() {
jvmti->SetEventNotificationMode(JVMTI_DISABLE, JVMTI_EVENT_MONITOR_CONTENDED_ENTERED, NULL);
// Reset Unsafe.park() trap
if (_original_Unsafe_Park != NULL) {
bindUnsafePark(_original_Unsafe_Park);
if (VMStructs::_unsafe_park != NULL) {
bindUnsafePark(VMStructs::_unsafe_park);
}
}
void JNICALL LockTracer::MonitorContendedEnter(jvmtiEnv* jvmti, JNIEnv* env, jthread thread, jobject object) {
jlong enter_time;
jvmti->GetTime(&enter_time);
jlong enter_time = OS::nanotime();
jvmti->SetTag(thread, enter_time);
}
void JNICALL LockTracer::MonitorContendedEntered(jvmtiEnv* jvmti, JNIEnv* env, jthread thread, jobject object) {
jlong enter_time, entered_time;
jvmti->GetTime(&entered_time);
jlong entered_time = OS::nanotime();
jlong enter_time;
jvmti->GetTag(thread, &enter_time);
// Time is meaningless if lock attempt has started before profiling
if (enter_time >= _start_time) {
recordContendedLock(env->GetObjectClass(object), entered_time - enter_time);
if (_enabled && enter_time >= _start_time) {
char* lock_name = getLockName(jvmti, env, object);
recordContendedLock(BCI_LOCK, enter_time, entered_time, lock_name, object, 0);
jvmti->Deallocate((unsigned char*)lock_name);
}
}
void JNICALL LockTracer::UnsafeParkTrap(JNIEnv* env, jobject instance, jboolean isAbsolute, jlong time) {
jvmtiEnv* jvmti = VM::jvmti();
jclass lock_class = getParkBlockerClass(jvmti, env);
jobject park_blocker = _enabled ? getParkBlocker(jvmti, env) : NULL;
jlong park_start_time, park_end_time;
if (lock_class != NULL) {
jvmti->GetTime(&park_start_time);
if (park_blocker != NULL) {
park_start_time = OS::nanotime();
}
_original_Unsafe_Park(env, instance, isAbsolute, time);
if (lock_class != NULL) {
jvmti->GetTime(&park_end_time);
recordContendedLock(lock_class, park_end_time - park_start_time);
VMStructs::_unsafe_park(env, instance, isAbsolute, time);
if (park_blocker != NULL) {
park_end_time = OS::nanotime();
char* lock_name = getLockName(jvmti, env, park_blocker);
if (lock_name == NULL || isConcurrentLock(lock_name)) {
recordContendedLock(BCI_PARK, park_start_time, park_end_time, lock_name, park_blocker, time);
}
jvmti->Deallocate((unsigned char*)lock_name);
}
}
jclass LockTracer::getParkBlockerClass(jvmtiEnv* jvmti, JNIEnv* env) {
jobject LockTracer::getParkBlocker(jvmtiEnv* jvmti, JNIEnv* env) {
jthread thread;
if (jvmti->GetCurrentThread(&thread) != 0) {
return NULL;
}
// Call LockSupport.getBlocker(Thread.currentThread())
jobject park_blocker = env->CallStaticObjectMethod(_LockSupport, _getBlocker, thread);
if (park_blocker == NULL) {
return NULL;
}
jclass lock_class = env->GetObjectClass(park_blocker);
char* class_name;
if (jvmti->GetClassSignature(lock_class, &class_name, NULL) != 0) {
return NULL;
}
// Do not count synchronizers other than ReentrantLock, ReentrantReadWriteLock and Semaphore
if (strncmp(class_name, "Ljava/util/concurrent/locks/ReentrantLock", 41) != 0 &&
strncmp(class_name, "Ljava/util/concurrent/locks/ReentrantReadWriteLock", 50) != 0 &&
strncmp(class_name, "Ljava/util/concurrent/Semaphore", 31) != 0) {
lock_class = NULL;
}
jvmti->Deallocate((unsigned char*)class_name);
return lock_class;
return env->CallStaticObjectMethod(_LockSupport, _getBlocker, thread);
}
void LockTracer::recordContendedLock(jclass lock_class, jlong time) {
if (VMStructs::hasPermGen()) {
// PermGen in JDK 7 makes difficult to get symbol name from jclass.
// Let's just skip it and record stack trace without lock class.
Profiler::_instance.recordSample(NULL, time, 0, NULL);
} else {
VMSymbol* lock_name = (*(java_lang_Class**)lock_class)->klass()->name();
Profiler::_instance.recordSample(NULL, time, BCI_SYMBOL, (jmethodID)lock_name);
char* LockTracer::getLockName(jvmtiEnv* jvmti, JNIEnv* env, jobject lock) {
char* class_name;
if (jvmti->GetClassSignature(env->GetObjectClass(lock), &class_name, NULL) != 0) {
return NULL;
}
return class_name;
}
bool LockTracer::isConcurrentLock(const char* lock_name) {
// Do not count synchronizers other than ReentrantLock, ReentrantReadWriteLock and Semaphore
return strncmp(lock_name, "Ljava/util/concurrent/locks/ReentrantLock", 41) == 0 ||
strncmp(lock_name, "Ljava/util/concurrent/locks/ReentrantReadWriteLock", 50) == 0 ||
strncmp(lock_name, "Ljava/util/concurrent/Semaphore", 31) == 0;
}
void LockTracer::recordContendedLock(int event_type, u64 start_time, u64 end_time,
const char* lock_name, jobject lock, jlong timeout) {
LockEvent event;
event._class_id = 0;
event._start_time = start_time;
event._end_time = end_time;
event._address = *(uintptr_t*)lock;
event._timeout = timeout;
if (lock_name != NULL) {
if (lock_name[0] == 'L') {
event._class_id = Profiler::_instance.classMap()->lookup(lock_name + 1, strlen(lock_name) - 2);
} else {
event._class_id = Profiler::_instance.classMap()->lookup(lock_name);
}
}
Profiler::_instance.recordSample(NULL, end_time - start_time, event_type, &event);
}
void LockTracer::bindUnsafePark(UnsafeParkFunc entry) {

19
src/lockTracer.h Executable file → Normal file
View File

@@ -18,6 +18,7 @@
#define _LOCKTRACER_H
#include <jvmti.h>
#include "arch.h"
#include "engine.h"
@@ -28,10 +29,12 @@ class LockTracer : public Engine {
static jlong _start_time;
static jclass _LockSupport;
static jmethodID _getBlocker;
static UnsafeParkFunc _original_Unsafe_Park;
static jclass getParkBlockerClass(jvmtiEnv* jvmti, JNIEnv* env);
static void recordContendedLock(jclass lock_class, jlong time);
static jobject getParkBlocker(jvmtiEnv* jvmti, JNIEnv* env);
static char* getLockName(jvmtiEnv* jvmti, JNIEnv* env, jobject lock);
static bool isConcurrentLock(const char* lock_name);
static void recordContendedLock(int event_type, u64 start_time, u64 end_time,
const char* lock_name, jobject lock, jlong timeout);
static void bindUnsafePark(UnsafeParkFunc entry);
public:
@@ -39,7 +42,15 @@ class LockTracer : public Engine {
return "lock";
}
Error start(const char* event, long interval);
const char* units() {
return "ns";
}
CStack cstack() {
return CSTACK_NO;
}
Error start(Arguments& args);
void stop();
static void JNICALL MonitorContendedEnter(jvmtiEnv* jvmti, JNIEnv* env, jthread thread, jobject object);

33
src/mutex.cpp Normal file
View File

@@ -0,0 +1,33 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "mutex.h"
Mutex::Mutex() {
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
pthread_mutex_init(&_mutex, &attr);
}
void Mutex::lock() {
pthread_mutex_lock(&_mutex);
}
void Mutex::unlock() {
pthread_mutex_unlock(&_mutex);
}

49
src/mutex.h Normal file
View File

@@ -0,0 +1,49 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _MUTEX_H
#define _MUTEX_H
#include <pthread.h>
class Mutex {
private:
pthread_mutex_t _mutex;
public:
Mutex();
void lock();
void unlock();
};
class MutexLocker {
private:
Mutex* _mutex;
public:
MutexLocker(Mutex& mutex) : _mutex(&mutex) {
_mutex->lock();
}
~MutexLocker() {
_mutex->unlock();
}
};
#endif // _MUTEX_H

82
src/os.h Normal file
View File

@@ -0,0 +1,82 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _OS_H
#define _OS_H
#include <signal.h>
#include <stddef.h>
#include "arch.h"
enum ThreadState {
THREAD_INVALID,
THREAD_RUNNING,
THREAD_SLEEPING
};
class Timer {
};
class ThreadList {
public:
virtual ~ThreadList() {}
virtual void rewind() = 0;
virtual int next() = 0;
virtual int size() = 0;
};
class OS {
private:
typedef void (*SigAction)(int, siginfo_t*, void*);
typedef void (*SigHandler)(int);
typedef void (*TimerCallback)(void*);
public:
static u64 nanotime();
static u64 millis();
static u64 processStartTime();
static u64 hton64(u64 x);
static u64 ntoh64(u64 x);
static int getMaxThreadId();
static int processId();
static int threadId();
static bool threadName(int thread_id, char* name_buf, size_t name_len);
static ThreadState threadState(int thread_id);
static ThreadList* listThreads();
static bool isJavaLibraryVisible();
static void installSignalHandler(int signo, SigAction action, SigHandler handler = NULL);
static bool sendSignalToThread(int thread_id, int signo);
static void* safeAlloc(size_t size);
static void safeFree(void* addr, size_t size);
static Timer* startTimer(u64 interval, TimerCallback callback, void* arg);
static void stopTimer(Timer* timer);
static bool getCpuDescription(char* buf, size_t size);
static u64 getProcessCpuTime(u64* utime, u64* stime);
static u64 getTotalCpuTime(u64* utime, u64* stime);
};
#endif // _OS_H

319
src/os_linux.cpp Normal file
View File

@@ -0,0 +1,319 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef __linux__
#include <arpa/inet.h>
#include <byteswap.h>
#include <dirent.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include "os.h"
#ifdef __LP64__
# define MMAP_SYSCALL __NR_mmap
#else
# define MMAP_SYSCALL __NR_mmap2
#endif
class LinuxThreadList : public ThreadList {
private:
DIR* _dir;
int _thread_count;
int getThreadCount() {
char buf[512];
int fd = open("/proc/self/stat", O_RDONLY);
if (fd == -1) {
return 0;
}
int thread_count = 0;
if (read(fd, buf, sizeof(buf)) > 0) {
char* s = strchr(buf, ')');
if (s != NULL) {
// Read 18th integer field after the command name
for (int field = 0; *s != ' ' || ++field < 18; s++) ;
thread_count = atoi(s + 1);
}
}
close(fd);
return thread_count;
}
public:
LinuxThreadList() {
_dir = opendir("/proc/self/task");
_thread_count = -1;
}
~LinuxThreadList() {
if (_dir != NULL) {
closedir(_dir);
}
}
void rewind() {
if (_dir != NULL) {
rewinddir(_dir);
}
_thread_count = -1;
}
int next() {
if (_dir != NULL) {
struct dirent* entry;
while ((entry = readdir(_dir)) != NULL) {
if (entry->d_name[0] != '.') {
return atoi(entry->d_name);
}
}
}
return -1;
}
int size() {
if (_thread_count < 0) {
_thread_count = getThreadCount();
}
return _thread_count;
}
};
u64 OS::nanotime() {
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return (u64)tp.tv_sec * 1000000000 + tp.tv_nsec;
}
u64 OS::millis() {
struct timeval tv;
gettimeofday(&tv, NULL);
return (u64)tv.tv_sec * 1000 + tv.tv_usec / 1000;
}
u64 OS::processStartTime() {
static u64 start_time = 0;
if (start_time == 0) {
char buf[64];
sprintf(buf, "/proc/%d", processId());
struct stat st;
if (stat(buf, &st) == 0) {
start_time = (u64)st.st_mtim.tv_sec * 1000 + st.st_mtim.tv_nsec / 1000000;
}
}
return start_time;
}
u64 OS::hton64(u64 x) {
return htonl(1) == 1 ? x : bswap_64(x);
}
u64 OS::ntoh64(u64 x) {
return ntohl(1) == 1 ? x : bswap_64(x);
}
int OS::getMaxThreadId() {
char buf[16] = "65536";
int fd = open("/proc/sys/kernel/pid_max", O_RDONLY);
if (fd != -1) {
ssize_t r = read(fd, buf, sizeof(buf) - 1);
(void) r;
close(fd);
}
return atoi(buf);
}
int OS::processId() {
static const int self_pid = getpid();
return self_pid;
}
int OS::threadId() {
return syscall(__NR_gettid);
}
bool OS::threadName(int thread_id, char* name_buf, size_t name_len) {
char buf[64];
sprintf(buf, "/proc/self/task/%d/comm", thread_id);
int fd = open(buf, O_RDONLY);
if (fd == -1) {
return false;
}
ssize_t r = read(fd, name_buf, name_len);
close(fd);
if (r > 0) {
name_buf[r - 1] = 0;
return true;
}
return false;
}
ThreadState OS::threadState(int thread_id) {
char buf[512];
sprintf(buf, "/proc/self/task/%d/stat", thread_id);
int fd = open(buf, O_RDONLY);
if (fd == -1) {
return THREAD_INVALID;
}
ThreadState state = THREAD_INVALID;
if (read(fd, buf, sizeof(buf)) > 0) {
char* s = strchr(buf, ')');
state = s != NULL && (s[2] == 'R' || s[2] == 'D') ? THREAD_RUNNING : THREAD_SLEEPING;
}
close(fd);
return state;
}
ThreadList* OS::listThreads() {
return new LinuxThreadList();
}
bool OS::isJavaLibraryVisible() {
return false;
}
void OS::installSignalHandler(int signo, SigAction action, SigHandler handler) {
struct sigaction sa;
sigemptyset(&sa.sa_mask);
if (handler != NULL) {
sa.sa_handler = handler;
sa.sa_flags = 0;
} else {
sa.sa_sigaction = action;
sa.sa_flags = SA_SIGINFO | SA_RESTART;
}
sigaction(signo, &sa, NULL);
}
bool OS::sendSignalToThread(int thread_id, int signo) {
return syscall(__NR_tgkill, processId(), thread_id, signo) == 0;
}
void* OS::safeAlloc(size_t size) {
// Naked syscall can be used inside a signal handler.
// Also, we don't want to catch our own calls when profiling mmap.
intptr_t result = syscall(MMAP_SYSCALL, NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (result < 0 && result > -4096) {
return NULL;
}
return (void*)result;
}
void OS::safeFree(void* addr, size_t size) {
syscall(__NR_munmap, addr, size);
}
Timer* OS::startTimer(u64 interval, TimerCallback callback, void* arg) {
struct sigevent sev;
sev.sigev_notify = SIGEV_THREAD;
sev.sigev_value.sival_ptr = arg;
sev.sigev_notify_function = (void (*)(union sigval)) callback;
sev.sigev_notify_attributes = NULL;
timer_t timer;
if (timer_create(CLOCK_MONOTONIC, &sev, &timer) != 0) {
return NULL;
}
struct itimerspec spec;
spec.it_interval.tv_sec = spec.it_value.tv_sec = interval / 1000000000;
spec.it_interval.tv_nsec = spec.it_value.tv_nsec = interval % 1000000000;
timer_settime(timer, 0, &spec, NULL);
return (Timer*)timer;
}
void OS::stopTimer(Timer* timer) {
timer_delete((timer_t)timer);
}
bool OS::getCpuDescription(char* buf, size_t size) {
int fd = open("/proc/cpuinfo", O_RDONLY);
if (fd == -1) {
return false;
}
ssize_t r = read(fd, buf, size);
close(fd);
if (r <= 0) {
return false;
}
buf[r < size ? r : size - 1] = 0;
char* c;
do {
c = strchr(buf, '\n');
} while (c != NULL && *(buf = c + 1) != '\n');
*buf = 0;
return true;
}
u64 OS::getProcessCpuTime(u64* utime, u64* stime) {
struct tms buf;
clock_t real = times(&buf);
*utime = buf.tms_utime;
*stime = buf.tms_stime;
return real;
}
u64 OS::getTotalCpuTime(u64* utime, u64* stime) {
int fd = open("/proc/stat", O_RDONLY);
if (fd == -1) {
return (u64)-1;
}
u64 real = (u64)-1;
char buf[512];
if (read(fd, buf, sizeof(buf)) >= 12) {
u64 user, nice, system, idle;
if (sscanf(buf + 4, "%llu %llu %llu %llu", &user, &nice, &system, &idle) == 4) {
*utime = user + nice;
*stime = system;
real = user + nice + system + idle;
}
}
close(fd);
return real;
}
#endif // __linux__

258
src/os_macos.cpp Normal file
View File

@@ -0,0 +1,258 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef __APPLE__
#include <dispatch/dispatch.h>
#include <libkern/OSByteOrder.h>
#include <libproc.h>
#include <mach/mach.h>
#include <mach/mach_host.h>
#include <mach/mach_time.h>
#include <mach/processor_info.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/times.h>
#include "os.h"
class MacThreadList : public ThreadList {
private:
task_t _task;
thread_array_t _thread_array;
unsigned int _thread_count;
unsigned int _thread_index;
void ensureThreadArray() {
if (_thread_array == NULL) {
_thread_count = 0;
_thread_index = 0;
task_threads(_task, &_thread_array, &_thread_count);
}
}
public:
MacThreadList() {
_task = mach_task_self();
_thread_array = NULL;
}
~MacThreadList() {
rewind();
}
void rewind() {
if (_thread_array != NULL) {
for (int i = 0; i < _thread_count; i++) {
mach_port_deallocate(_task, _thread_array[i]);
}
vm_deallocate(_task, (vm_address_t)_thread_array, _thread_count * sizeof(thread_t));
_thread_array = NULL;
}
}
int next() {
ensureThreadArray();
if (_thread_index < _thread_count) {
return (int)_thread_array[_thread_index++];
}
return -1;
}
int size() {
ensureThreadArray();
return _thread_count;
}
};
static mach_timebase_info_data_t timebase = {0, 0};
u64 OS::nanotime() {
if (timebase.denom == 0) {
mach_timebase_info(&timebase);
}
return (u64)mach_absolute_time() * timebase.numer / timebase.denom;
}
u64 OS::millis() {
struct timeval tv;
gettimeofday(&tv, NULL);
return (u64)tv.tv_sec * 1000 + tv.tv_usec / 1000;
}
u64 OS::processStartTime() {
static u64 start_time = 0;
if (start_time == 0) {
struct proc_bsdinfo info;
if (proc_pidinfo(processId(), PROC_PIDTBSDINFO, 0, &info, sizeof(info)) > 0) {
start_time = (u64)info.pbi_start_tvsec * 1000 + info.pbi_start_tvusec / 1000;
}
}
return start_time;
}
u64 OS::hton64(u64 x) {
return OSSwapHostToBigInt64(x);
}
u64 OS::ntoh64(u64 x) {
return OSSwapBigToHostInt64(x);
}
int OS::getMaxThreadId() {
return 0x7fffffff;
}
int OS::processId() {
static const int self_pid = getpid();
return self_pid;
}
int OS::threadId() {
// Used to be pthread_mach_thread_np(pthread_self()),
// but pthread_mach_thread_np is not async signal safe
mach_port_t port = mach_thread_self();
mach_port_deallocate(mach_task_self(), port);
return (int)port;
}
bool OS::threadName(int thread_id, char* name_buf, size_t name_len) {
pthread_t thread = pthread_from_mach_thread_np(thread_id);
return thread && pthread_getname_np(thread, name_buf, name_len) == 0 && name_buf[0] != 0;
}
ThreadState OS::threadState(int thread_id) {
struct thread_basic_info info;
mach_msg_type_number_t size = sizeof(info);
if (thread_info((thread_act_t)thread_id, THREAD_BASIC_INFO, (thread_info_t)&info, &size) != 0) {
return THREAD_INVALID;
}
return info.run_state == TH_STATE_RUNNING ? THREAD_RUNNING : THREAD_SLEEPING;
}
ThreadList* OS::listThreads() {
return new MacThreadList();
}
bool OS::isJavaLibraryVisible() {
return true;
}
void OS::installSignalHandler(int signo, SigAction action, SigHandler handler) {
struct sigaction sa;
sigemptyset(&sa.sa_mask);
if (handler != NULL) {
sa.sa_handler = handler;
sa.sa_flags = 0;
} else {
sa.sa_sigaction = action;
sa.sa_flags = SA_SIGINFO | SA_RESTART;
}
sigaction(signo, &sa, NULL);
}
bool OS::sendSignalToThread(int thread_id, int signo) {
int result;
asm volatile("syscall"
: "=a" (result)
: "a" (0x2000148), "D" (thread_id), "S" (signo)
: "rcx", "r11", "memory");
return result == 0;
}
void* OS::safeAlloc(size_t size) {
// mmap() is not guaranteed to be async signal safe, but in practice, it is.
// There is no a reasonable alternative anyway.
void* result = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (result == MAP_FAILED) {
return NULL;
}
return result;
}
void OS::safeFree(void* addr, size_t size) {
munmap(addr, size);
}
Timer* OS::startTimer(u64 interval, TimerCallback callback, void* arg) {
dispatch_queue_t queue = dispatch_get_global_queue(QOS_CLASS_UTILITY, 0);
dispatch_source_t source = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, queue);
if (source != NULL) {
dispatch_source_set_timer(source, dispatch_time(DISPATCH_TIME_NOW, interval), interval, 0);
dispatch_source_set_event_handler_f(source, callback);
dispatch_set_context(source, arg);
dispatch_resume(source);
}
return (Timer*)source;
}
void OS::stopTimer(Timer* timer) {
dispatch_source_t source = (dispatch_source_t)timer;
if (source != NULL) {
dispatch_source_cancel(source);
dispatch_release(source);
}
}
bool OS::getCpuDescription(char* buf, size_t size) {
return sysctlbyname("machdep.cpu.brand_string", buf, &size, NULL, 0) == 0;
}
u64 OS::getProcessCpuTime(u64* utime, u64* stime) {
struct tms buf;
clock_t real = times(&buf);
*utime = buf.tms_utime;
*stime = buf.tms_stime;
return real;
}
u64 OS::getTotalCpuTime(u64* utime, u64* stime) {
natural_t cpu_count;
processor_info_array_t cpu_info_array;
mach_msg_type_number_t cpu_info_count;
host_name_port_t host = mach_host_self();
kern_return_t ret = host_processor_info(host, PROCESSOR_CPU_LOAD_INFO, &cpu_count, &cpu_info_array, &cpu_info_count);
mach_port_deallocate(mach_task_self(), host);
if (ret != 0) {
return (u64)-1;
}
processor_cpu_load_info_data_t* cpu_load = (processor_cpu_load_info_data_t*)cpu_info_array;
u64 user = 0;
u64 system = 0;
u64 idle = 0;
for (natural_t i = 0; i < cpu_count; i++) {
user += cpu_load[i].cpu_ticks[CPU_STATE_USER] + cpu_load[i].cpu_ticks[CPU_STATE_NICE];
system += cpu_load[i].cpu_ticks[CPU_STATE_SYSTEM];
idle += cpu_load[i].cpu_ticks[CPU_STATE_IDLE];
}
vm_deallocate(mach_task_self(), (vm_address_t)cpu_info_array, cpu_info_count * sizeof(int));
*utime = user;
*stime = system;
return user + system + idle;
}
#endif // __APPLE__

30
src/perfEvents.h Executable file → Normal file
View File

@@ -17,7 +17,6 @@
#ifndef _PERFEVENTS_H
#define _PERFEVENTS_H
#include <jvmti.h>
#include <signal.h>
#include "engine.h"
@@ -31,12 +30,10 @@ class PerfEvents : public Engine {
static PerfEvent* _events;
static PerfEventType* _event_type;
static long _interval;
static Ring _ring;
static CStack _cstack;
static bool _print_extended_warning;
static void createForThread(int tid);
static void createForAllThreads();
static void destroyForThread(int tid);
static void destroyForAllThreads();
static void installSignalHandler();
static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext);
public:
@@ -44,21 +41,20 @@ class PerfEvents : public Engine {
return "perf";
}
Error start(const char* event, long interval);
const char* units();
Error check(Arguments& args);
Error start(Arguments& args);
void stop();
static void init();
static int tid();
static const char** getAvailableEvents();
static int getCallChain(int tid, const void** callchain, int max_depth);
int getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
CodeCache* java_methods, CodeCache* runtime_stubs);
static void JNICALL ThreadStart(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) {
createForThread(tid());
}
static bool supported();
static const char* getEventName(int event_id);
static void JNICALL ThreadEnd(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) {
destroyForThread(tid());
}
static bool createForThread(int tid);
static void destroyForThread(int tid);
};
#endif // _PERFEVENTS_H

561
src/perfEvents_linux.cpp Executable file → Normal file
View File

@@ -16,20 +16,27 @@
#ifdef __linux__
#include <jvmti.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <dirent.h>
#include <dlfcn.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
#include "arch.h"
#include "os.h"
#include "perfEvents.h"
#include "profiler.h"
#include "spinLock.h"
#include "stackFrame.h"
#include "symbols.h"
// Ancient fcntl.h does not define F_SETOWN_EX constants and structures
@@ -44,22 +51,21 @@ struct f_owner_ex {
#endif // F_SETOWN_EX
static int getMaxPID() {
char buf[16] = "65536";
int fd = open("/proc/sys/kernel/pid_max", O_RDONLY);
if (fd != -1) {
ssize_t r = read(fd, buf, sizeof(buf) - 1);
(void) r;
close(fd);
}
return atoi(buf);
}
enum {
HW_BREAKPOINT_R = 1,
HW_BREAKPOINT_W = 2,
HW_BREAKPOINT_RW = 3,
HW_BREAKPOINT_X = 4
};
static const unsigned long PERF_PAGE_SIZE = sysconf(_SC_PAGESIZE);
// Get perf_event_attr.config numeric value of the given tracepoint name
// by reading /sys/kernel/debug/tracing/events/<name>/id file
static int getTracepointId(const char* name) {
static int findTracepointId(const char* name) {
char buf[256];
if (snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/id", name) >= sizeof(buf)) {
if ((size_t)snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/id", name) >= sizeof(buf)) {
return 0;
}
@@ -78,34 +84,138 @@ static int getTracepointId(const char* name) {
}
struct FunctionWithCounter {
const char* name;
int counter_arg;
};
struct PerfEventType {
const char* name;
long default_interval;
__u32 precise_ip;
__u32 type;
__u64 config;
__u32 bp_type;
__u32 bp_len;
int counter_arg;
static PerfEventType AVAILABLE_EVENTS[];
static PerfEventType KERNEL_TRACEPOINT;
static FunctionWithCounter KNOWN_FUNCTIONS[];
// Find which argument of a known function serves as a profiling counter,
// e.g. the first argument of malloc() is allocation size
static int findCounterArg(const char* name) {
for (FunctionWithCounter* func = KNOWN_FUNCTIONS; func->name != NULL; func++) {
if (strcmp(name, func->name) == 0) {
return func->counter_arg;
}
}
return 0;
}
static PerfEventType* findByType(__u32 type) {
for (PerfEventType* event = AVAILABLE_EVENTS; ; event++) {
if (event->type == type) {
return event;
}
}
}
// Breakpoint format: func[+offset][/len][:rwx]
static PerfEventType* getBreakpoint(const char* name, __u32 bp_type, __u32 bp_len) {
char buf[256];
strncpy(buf, name, sizeof(buf) - 1);
buf[sizeof(buf) - 1] = 0;
// Parse access type [:rwx]
char* c = strrchr(buf, ':');
if (c != NULL && c != name && c[-1] != ':') {
*c++ = 0;
if (strcmp(c, "r") == 0) {
bp_type = HW_BREAKPOINT_R;
} else if (strcmp(c, "w") == 0) {
bp_type = HW_BREAKPOINT_W;
} else if (strcmp(c, "x") == 0) {
bp_type = HW_BREAKPOINT_X;
bp_len = sizeof(long);
} else {
bp_type = HW_BREAKPOINT_RW;
}
}
// Parse length [/8]
c = strrchr(buf, '/');
if (c != NULL) {
*c++ = 0;
bp_len = (__u32)strtol(c, NULL, 0);
}
// Parse offset [+0x1234]
long long offset = 0;
c = strrchr(buf, '+');
if (c != NULL) {
*c++ = 0;
offset = strtoll(c, NULL, 0);
}
// Parse symbol or absolute address
__u64 addr;
if (strncmp(buf, "0x", 2) == 0) {
addr = (__u64)strtoll(buf, NULL, 0);
} else {
addr = (__u64)(uintptr_t)dlsym(RTLD_DEFAULT, buf);
if (addr == 0) {
addr = (__u64)(uintptr_t)Profiler::_instance.resolveSymbol(buf);
}
}
if (addr == 0) {
return NULL;
}
PerfEventType* breakpoint = findByType(PERF_TYPE_BREAKPOINT);
breakpoint->config = addr + offset;
breakpoint->bp_type = bp_type;
breakpoint->bp_len = bp_len;
breakpoint->counter_arg = bp_type == HW_BREAKPOINT_X ? findCounterArg(buf) : 0;
return breakpoint;
}
static PerfEventType* getTracepoint(int tracepoint_id) {
PerfEventType* tracepoint = findByType(PERF_TYPE_TRACEPOINT);
tracepoint->config = tracepoint_id;
return tracepoint;
}
static PerfEventType* forName(const char* name) {
// First, look through the table of predefined perf events
// Hardware breakpoint
if (strncmp(name, "mem:", 4) == 0) {
return getBreakpoint(name + 4, HW_BREAKPOINT_RW, 1);
}
// Raw tracepoint ID
if (strncmp(name, "trace:", 6) == 0) {
int tracepoint_id = atoi(name + 6);
return tracepoint_id > 0 ? getTracepoint(tracepoint_id) : NULL;
}
// Look through the table of predefined perf events
for (PerfEventType* event = AVAILABLE_EVENTS; event->name != NULL; event++) {
if (strcmp(name, event->name) == 0) {
return event;
}
}
// Second, try kernel tracepoints defined in debugfs
if (strchr(name, ':') != NULL) {
int tracepoint_id = getTracepointId(name);
// Kernel tracepoints defined in debugfs
const char* c = strchr(name, ':');
if (c != NULL && c[1] != ':') {
int tracepoint_id = findTracepointId(name);
if (tracepoint_id > 0) {
KERNEL_TRACEPOINT.config = tracepoint_id;
return &KERNEL_TRACEPOINT;
return getTracepoint(tracepoint_id);
}
}
return NULL;
// Finally, treat event as a function name and return an execution breakpoint
return getBreakpoint(name, HW_BREAKPOINT_X, sizeof(long));
}
};
@@ -114,26 +224,39 @@ struct PerfEventType {
((perf_hw_cache_id) | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
PerfEventType PerfEventType::AVAILABLE_EVENTS[] = {
{"cpu", 1000000, 2, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK},
{"page-faults", 1, 2, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS},
{"context-switches", 1, 2, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES},
{"cpu", DEFAULT_INTERVAL, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK},
{"page-faults", 1, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS},
{"context-switches", 1, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES},
{"cycles", 1000000, 2, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES},
{"instructions", 1000000, 2, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS},
{"cache-references", 1000000, 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES},
{"cache-misses", 1000, 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES},
{"branches", 1000000, 2, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
{"branch-misses", 1000, 2, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES},
{"bus-cycles", 1000000, 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES},
{"cycles", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES},
{"instructions", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS},
{"cache-references", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES},
{"cache-misses", 1000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES},
{"branches", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
{"branch-misses", 1000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES},
{"bus-cycles", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES},
{"L1-dcache-load-misses", 1000000, 0, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_L1D)},
{"LLC-load-misses", 1000, 0, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_LL)},
{"dTLB-load-misses", 1000, 0, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_DTLB)},
{"L1-dcache-load-misses", 1000000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_L1D)},
{"LLC-load-misses", 1000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_LL)},
{"dTLB-load-misses", 1000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_DTLB)},
{"mem:breakpoint", 1, PERF_TYPE_BREAKPOINT, 0},
{"trace:tracepoint", 1, PERF_TYPE_TRACEPOINT, 0},
{NULL}
};
PerfEventType PerfEventType::KERNEL_TRACEPOINT = {"tracepoint", 1, 0, PERF_TYPE_TRACEPOINT, 0};
FunctionWithCounter PerfEventType::KNOWN_FUNCTIONS[] = {
{"malloc", 1},
{"mmap", 2},
{"read", 3},
{"write", 3},
{"send", 3},
{"recv", 3},
{"sendto", 3},
{"recvfrom", 3},
{NULL}
};
class RingBuffer {
@@ -143,18 +266,23 @@ class RingBuffer {
public:
RingBuffer(struct perf_event_mmap_page* page) {
_start = (const char*)page + PAGE_SIZE;
_start = (const char*)page + PERF_PAGE_SIZE;
}
struct perf_event_header* seek(u64 offset) {
_offset = (unsigned long)offset & PAGE_MASK;
_offset = (unsigned long)offset & (PERF_PAGE_SIZE - 1);
return (struct perf_event_header*)(_start + _offset);
}
u64 next() {
_offset = (_offset + sizeof(u64)) & PAGE_MASK;
_offset = (_offset + sizeof(u64)) & (PERF_PAGE_SIZE - 1);
return *(u64*)(_start + _offset);
}
u64 peek(unsigned long words) {
unsigned long peek_offset = (_offset + words * sizeof(u64)) & (PERF_PAGE_SIZE - 1);
return *(u64*)(_start + peek_offset);
}
};
@@ -171,42 +299,85 @@ int PerfEvents::_max_events = 0;
PerfEvent* PerfEvents::_events = NULL;
PerfEventType* PerfEvents::_event_type = NULL;
long PerfEvents::_interval;
Ring PerfEvents::_ring;
CStack PerfEvents::_cstack;
bool PerfEvents::_print_extended_warning;
void PerfEvents::init() {
_max_events = getMaxPID();
_events = (PerfEvent*)calloc(_max_events, sizeof(PerfEvent));
}
bool PerfEvents::createForThread(int tid) {
if (tid >= _max_events) {
fprintf(stderr, "WARNING: tid[%d] > pid_max[%d]. Restart profiler after changing pid_max\n", tid, _max_events);
return false;
}
int PerfEvents::tid() {
return syscall(__NR_gettid);
}
PerfEventType* event_type = _event_type;
if (event_type == NULL) {
return false;
}
void PerfEvents::createForThread(int tid) {
struct perf_event_attr attr = {0};
attr.size = sizeof(attr);
attr.type = _event_type->type;
attr.config = _event_type->config;
attr.precise_ip = _event_type->precise_ip;
attr.type = event_type->type;
if (attr.type == PERF_TYPE_BREAKPOINT) {
attr.bp_addr = event_type->config;
attr.bp_type = event_type->bp_type;
attr.bp_len = event_type->bp_len;
} else {
attr.config = event_type->config;
}
// Hardware events may not always support zero skid
if (attr.type == PERF_TYPE_SOFTWARE) {
attr.precise_ip = 2;
}
attr.sample_period = _interval;
attr.sample_type = PERF_SAMPLE_CALLCHAIN;
attr.disabled = 1;
attr.wakeup_events = 1;
attr.exclude_idle = 1;
if (_ring == RING_USER) {
attr.exclude_kernel = 1;
} else if (_ring == RING_KERNEL) {
attr.exclude_user = 1;
}
#ifdef PERF_ATTR_SIZE_VER5
if (_cstack == CSTACK_LBR) {
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK | PERF_SAMPLE_REGS_USER;
attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK;
attr.sample_regs_user = 1ULL << PERF_REG_PC;
attr.exclude_callchain_user = 1;
}
#else
#warning "Compiling without LBR support. Kernel headers 4.1+ required"
#endif
int fd = syscall(__NR_perf_event_open, &attr, tid, -1, -1, 0);
if (fd == -1) {
int err = errno;
perror("perf_event_open failed");
return;
if (err == EACCES && _print_extended_warning) {
fprintf(stderr, "Due to permission restrictions, you cannot collect kernel events.\n"
"Try with --all-user option, or 'echo 1 > /proc/sys/kernel/perf_event_paranoid'\n");
_print_extended_warning = false;
}
return false;
}
void* page = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (!__sync_bool_compare_and_swap(&_events[tid]._fd, 0, fd)) {
// Lost race. The event is created either from start() or from onThreadStart()
close(fd);
return false;
}
void* page = mmap(NULL, 2 * PERF_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (page == MAP_FAILED) {
perror("perf_event mmap failed");
page = NULL;
}
_events[tid].reset();
_events[tid]._fd = fd;
_events[tid]._page = (struct perf_event_mmap_page*)page;
struct f_owner_ex ex;
@@ -219,106 +390,172 @@ void PerfEvents::createForThread(int tid) {
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_REFRESH, 1);
}
void PerfEvents::createForAllThreads() {
DIR* dir = opendir("/proc/self/task");
if (dir == NULL) return;
struct dirent* entry;
while ((entry = readdir(dir)) != NULL) {
if (entry->d_name[0] != '.') {
int tid = atoi(entry->d_name);
createForThread(tid);
}
}
closedir(dir);
return true;
}
void PerfEvents::destroyForThread(int tid) {
if (tid >= _max_events) {
return;
}
PerfEvent* event = &_events[tid];
if (event->_fd != 0) {
ioctl(event->_fd, PERF_EVENT_IOC_DISABLE, 0);
close(event->_fd);
event->_fd = 0;
int fd = event->_fd;
if (fd != 0 && __sync_bool_compare_and_swap(&event->_fd, fd, 0)) {
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
close(fd);
}
if (event->_page != NULL) {
event->lock();
munmap(event->_page, 2 * PAGE_SIZE);
munmap(event->_page, 2 * PERF_PAGE_SIZE);
event->_page = NULL;
event->unlock();
}
}
void PerfEvents::destroyForAllThreads() {
void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
if (siginfo->si_code <= 0) {
// Looks like an external signal; don't treat as a profiling event
return;
}
if (_enabled) {
u64 counter;
switch (_event_type->counter_arg) {
case 1: counter = StackFrame(ucontext).arg0(); break;
case 2: counter = StackFrame(ucontext).arg1(); break;
case 3: counter = StackFrame(ucontext).arg2(); break;
case 4: counter = StackFrame(ucontext).arg3(); break;
default:
if (read(siginfo->si_fd, &counter, sizeof(counter)) != sizeof(counter)) {
counter = 1;
}
}
ExecutionEvent event;
Profiler::_instance.recordSample(ucontext, counter, 0, &event);
}
ioctl(siginfo->si_fd, PERF_EVENT_IOC_RESET, 0);
ioctl(siginfo->si_fd, PERF_EVENT_IOC_REFRESH, 1);
}
const char* PerfEvents::units() {
if (_event_type == NULL || _event_type->name == EVENT_CPU) {
return "ns";
} else if (_event_type->type == PERF_TYPE_BREAKPOINT || _event_type->type == PERF_TYPE_TRACEPOINT) {
return "events";
}
const char* dash = strrchr(_event_type->name, '-');
return dash != NULL ? dash + 1 : _event_type->name;
}
Error PerfEvents::check(Arguments& args) {
PerfEventType* event_type = PerfEventType::forName(args._event_desc);
if (event_type == NULL) {
return Error("Unsupported event type");
}
struct perf_event_attr attr = {0};
attr.size = sizeof(attr);
attr.type = event_type->type;
if (attr.type == PERF_TYPE_BREAKPOINT) {
attr.bp_addr = event_type->config;
attr.bp_type = event_type->bp_type;
attr.bp_len = event_type->bp_len;
} else {
attr.config = event_type->config;
}
attr.sample_period = event_type->default_interval;
attr.sample_type = PERF_SAMPLE_CALLCHAIN;
attr.disabled = 1;
if (args._ring == RING_USER) {
attr.exclude_kernel = 1;
} else if (args._ring == RING_KERNEL) {
attr.exclude_user = 1;
} else if (!Symbols::haveKernelSymbols()) {
Profiler::_instance.updateSymbols(true);
attr.exclude_kernel = Symbols::haveKernelSymbols() ? 0 : 1;
}
#ifdef PERF_ATTR_SIZE_VER5
if (args._cstack == CSTACK_LBR) {
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK | PERF_SAMPLE_REGS_USER;
attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK;
attr.sample_regs_user = 1ULL << PERF_REG_PC;
attr.exclude_callchain_user = 1;
}
#endif
int fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
if (fd == -1) {
return Error(strerror(errno));
}
close(fd);
return Error::OK;
}
Error PerfEvents::start(Arguments& args) {
_event_type = PerfEventType::forName(args._event_desc);
if (_event_type == NULL) {
return Error("Unsupported event type");
}
if (args._interval < 0) {
return Error("interval must be positive");
}
_interval = args._interval ? args._interval : _event_type->default_interval;
_ring = args._ring;
if (_ring != RING_USER && !Symbols::haveKernelSymbols()) {
fprintf(stderr, "WARNING: Kernel symbols are unavailable due to restrictions. Try\n"
" echo 0 > /proc/sys/kernel/kptr_restrict\n"
" echo 1 > /proc/sys/kernel/perf_event_paranoid\n");
_ring = RING_USER;
}
_cstack = args._cstack;
_print_extended_warning = _ring != RING_USER;
int max_events = OS::getMaxThreadId();
if (max_events != _max_events) {
free(_events);
_events = (PerfEvent*)calloc(max_events, sizeof(PerfEvent));
_max_events = max_events;
}
OS::installSignalHandler(SIGPROF, signalHandler);
// Enable thread events before traversing currently running threads
Profiler::_instance.switchThreadEvents(JVMTI_ENABLE);
// Create perf_events for all existing threads
bool created = false;
ThreadList* thread_list = OS::listThreads();
for (int tid; (tid = thread_list->next()) != -1; ) {
created |= createForThread(tid);
}
delete thread_list;
if (!created) {
Profiler::_instance.switchThreadEvents(JVMTI_DISABLE);
return Error("Perf events unavailable. See stderr of the target process.");
}
return Error::OK;
}
void PerfEvents::stop() {
for (int i = 0; i < _max_events; i++) {
destroyForThread(i);
}
}
void PerfEvents::installSignalHandler() {
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_handler = NULL;
sa.sa_sigaction = signalHandler;
sa.sa_flags = SA_RESTART | SA_SIGINFO;
sigaction(SIGPROF, &sa, NULL);
}
void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
u64 counter;
if (read(siginfo->si_fd, &counter, sizeof(counter)) != sizeof(counter)) {
counter = 1;
}
Profiler::_instance.recordSample(ucontext, counter, 0, NULL);
ioctl(siginfo->si_fd, PERF_EVENT_IOC_RESET, 0);
ioctl(siginfo->si_fd, PERF_EVENT_IOC_REFRESH, 1);
}
Error PerfEvents::start(const char* event, long interval) {
_event_type = PerfEventType::forName(event);
if (_event_type == NULL) {
return Error("Unsupported event type");
}
if (interval < 0) {
return Error("interval must be positive");
}
_interval = interval ? interval : _event_type->default_interval;
installSignalHandler();
jvmtiEnv* jvmti = VM::jvmti();
jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_THREAD_START, NULL);
jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_THREAD_END, NULL);
createForAllThreads();
return Error::OK;
}
void PerfEvents::stop() {
jvmtiEnv* jvmti = VM::jvmti();
jvmti->SetEventNotificationMode(JVMTI_DISABLE, JVMTI_EVENT_THREAD_START, NULL);
jvmti->SetEventNotificationMode(JVMTI_DISABLE, JVMTI_EVENT_THREAD_END, NULL);
destroyForAllThreads();
}
const char** PerfEvents::getAvailableEvents() {
int count = sizeof(PerfEventType::AVAILABLE_EVENTS) / sizeof(PerfEventType);
const char** available_events = new const char*[count];
for (int i = 0; i < count; i++) {
available_events[i] = PerfEventType::AVAILABLE_EVENTS[i].name;
}
return available_events;
}
int PerfEvents::getCallChain(int tid, const void** callchain, int max_depth) {
int PerfEvents::getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
CodeCache* java_methods, CodeCache* runtime_stubs) {
PerfEvent* event = &_events[tid];
if (!event->tryLock()) {
return 0; // the event is being destroyed
@@ -340,15 +577,49 @@ int PerfEvents::getCallChain(int tid, const void** callchain, int max_depth) {
u64 nr = ring.next();
while (nr-- > 0) {
u64 ip = ring.next();
if (ip < PERF_CONTEXT_MAX && depth < max_depth) {
callchain[depth++] = (const void*)ip;
if (ip < PERF_CONTEXT_MAX) {
const void* iptr = (const void*)ip;
if (java_methods->contains(iptr) || runtime_stubs->contains(iptr) || depth >= max_depth) {
// Stop at the first Java frame
goto stack_complete;
}
callchain[depth++] = iptr;
}
}
if (_cstack == CSTACK_LBR) {
u64 bnr = ring.next();
// Last userspace PC is stored right after branch stack
const void* pc = (const void*)ring.peek(bnr * 3 + 2);
if (java_methods->contains(pc) || runtime_stubs->contains(pc) || depth >= max_depth) {
goto stack_complete;
}
callchain[depth++] = pc;
while (bnr-- > 0) {
const void* from = (const void*)ring.next();
const void* to = (const void*)ring.next();
ring.next();
if (java_methods->contains(to) || runtime_stubs->contains(to) || depth >= max_depth) {
goto stack_complete;
}
callchain[depth++] = to;
if (java_methods->contains(from) || runtime_stubs->contains(from) || depth >= max_depth) {
goto stack_complete;
}
callchain[depth++] = from;
}
}
break;
}
tail += hdr->size;
}
stack_complete:
page->data_tail = head;
}
@@ -356,4 +627,18 @@ int PerfEvents::getCallChain(int tid, const void** callchain, int max_depth) {
return depth;
}
bool PerfEvents::supported() {
// The official way of knowing if perf_event_open() support is enabled
// is checking for the existence of the file /proc/sys/kernel/perf_event_paranoid
struct stat statbuf;
return stat("/proc/sys/kernel/perf_event_paranoid", &statbuf) == 0;
}
const char* PerfEvents::getEventName(int event_id) {
if (event_id >= 0 && (size_t)event_id < sizeof(PerfEventType::AVAILABLE_EVENTS) / sizeof(PerfEventType)) {
return PerfEventType::AVAILABLE_EVENTS[event_id].name;
}
return NULL;
}
#endif // __linux__

80
src/perfEvents_macos.cpp Executable file → Normal file
View File

@@ -16,79 +16,53 @@
#ifdef __APPLE__
#include <string.h>
#include <sys/time.h>
#include <pthread.h>
#include "perfEvents.h"
#include "profiler.h"
int PerfEvents::_max_events;
PerfEvent* PerfEvents::_events;
PerfEventType* PerfEvents::_event_type;
long PerfEvents::_interval;
Ring PerfEvents::_ring;
bool PerfEvents::_print_extended_warning;
void PerfEvents::init() {}
int PerfEvents::tid() {
return pthread_mach_thread_np(pthread_self());
}
void PerfEvents::createForThread(int tid) {}
void PerfEvents::createForAllThreads() {}
void PerfEvents::destroyForThread(int tid) {}
void PerfEvents::destroyForAllThreads() {}
void PerfEvents::installSignalHandler() {
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_handler = NULL;
sa.sa_sigaction = signalHandler;
sa.sa_flags = SA_RESTART | SA_SIGINFO;
sigaction(SIGPROF, &sa, NULL);
}
void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
Profiler::_instance.recordSample(ucontext, _interval, 0, NULL);
}
Error PerfEvents::start(const char* event, long interval) {
if (strcmp(event, EVENT_CPU) != 0) {
return Error("Event is not supported on this platform");
}
const char* PerfEvents::units() {
return "ns";
}
if (interval < 0) {
return Error("interval must be positive");
}
_interval = interval ? interval : DEFAULT_INTERVAL;
Error PerfEvents::check(Arguments& args) {
return Error("PerfEvents are unsupported on macOS");
}
installSignalHandler();
long sec = _interval / 1000000000;
long usec = (_interval % 1000000000) / 1000;
struct itimerval tv = {{sec, usec}, {sec, usec}};
setitimer(ITIMER_PROF, &tv, NULL);
return Error::OK;
Error PerfEvents::start(Arguments& args) {
return Error("PerfEvents are unsupported on macOS");
}
void PerfEvents::stop() {
struct itimerval tv = {{0, 0}, {0, 0}};
setitimer(ITIMER_PROF, &tv, NULL);
}
const char** PerfEvents::getAvailableEvents() {
const char** available_events = new const char*[2];
available_events[0] = "cpu";
available_events[1] = NULL;
return available_events;
}
int PerfEvents::getCallChain(int tid, const void** callchain, int max_depth) {
int PerfEvents::getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
CodeCache* java_methods, CodeCache* runtime_stubs) {
return 0;
}
bool PerfEvents::supported() {
return false;
}
const char* PerfEvents::getEventName(int event_id) {
return NULL;
}
bool PerfEvents::createForThread(int tid) {
return false;
}
void PerfEvents::destroyForThread(int tid) {
}
#endif // __APPLE__

1319
src/profiler.cpp Executable file → Normal file

File diff suppressed because it is too large Load Diff

235
src/profiler.h Executable file → Normal file
View File

@@ -18,73 +18,49 @@
#define _PROFILER_H
#include <iostream>
#include <pthread.h>
#include <map>
#include <time.h>
#include "arch.h"
#include "arguments.h"
#include "engine.h"
#include "spinLock.h"
#include "callTraceStorage.h"
#include "codeCache.h"
#include "dictionary.h"
#include "engine.h"
#include "event.h"
#include "flightRecorder.h"
#include "mutex.h"
#include "spinLock.h"
#include "threadFilter.h"
#include "trap.h"
#include "vmEntry.h"
const int MAX_CALLTRACES = 65536;
const int MAX_STACK_FRAMES = 2048;
const char FULL_VERSION_STRING[] =
"Async-profiler " PROFILER_VERSION " built on " __DATE__ "\n"
"Copyright 2016-2021 Andrei Pangin\n";
const int MAX_NATIVE_FRAMES = 128;
const int RESERVED_FRAMES = 4;
const int MAX_NATIVE_LIBS = 2048;
const int CONCURRENCY_LEVEL = 16;
static inline int cmp64(u64 a, u64 b) {
return a > b ? 1 : a == b ? 0 : -1;
}
class CallTraceSample {
private:
u64 _samples;
u64 _counter;
int _start_frame; // Offset in frame buffer
int _num_frames;
public:
static int comparator(const void* s1, const void* s2) {
return cmp64(((CallTraceSample*)s2)->_counter, ((CallTraceSample*)s1)->_counter);
}
friend class Profiler;
};
class MethodSample {
private:
u64 _samples;
u64 _counter;
ASGCT_CallFrame _method;
public:
static int comparator(const void* s1, const void* s2) {
return cmp64(((MethodSample*)s2)->_counter, ((MethodSample*)s1)->_counter);
}
friend class Profiler;
enum AddressType {
ADDR_UNKNOWN,
ADDR_JIT,
ADDR_STUB,
ADDR_NATIVE
};
class MutexLocker {
private:
pthread_mutex_t* _mutex;
public:
MutexLocker(pthread_mutex_t& mutex) : _mutex(&mutex) {
pthread_mutex_lock(_mutex);
}
~MutexLocker() {
pthread_mutex_unlock(_mutex);
}
union CallTraceBuffer {
ASGCT_CallFrame _asgct_frames[1];
jvmtiFrameInfo _jvmti_frames[1];
};
class FrameName;
enum State {
IDLE,
RUNNING,
@@ -93,103 +69,136 @@ enum State {
class Profiler {
private:
// See hotspot/src/share/vm/prims/forte.cpp
enum {
ticks_no_Java_frame = 0,
ticks_no_class_load = -1,
ticks_GC_active = -2,
ticks_unknown_not_Java = -3,
ticks_not_walkable_not_Java = -4,
ticks_unknown_Java = -5,
ticks_not_walkable_Java = -6,
ticks_unknown_state = -7,
ticks_thread_exit = -8,
ticks_deopt = -9,
ticks_safepoint = -10,
ticks_skipped = -11,
FAILURE_TYPES = 12
};
pthread_mutex_t _state_lock;
Mutex _state_lock;
State _state;
Trap _begin_trap;
Trap _end_trap;
Mutex _thread_names_lock;
// TODO: single map?
std::map<int, std::string> _thread_names;
std::map<int, jlong> _thread_ids;
Dictionary _class_map;
Dictionary _symbol_map;
ThreadFilter _thread_filter;
CallTraceStorage _call_trace_storage;
FlightRecorder _jfr;
Engine* _engine;
int _events;
time_t _start_time;
u64 _total_samples;
u64 _total_counter;
u64 _failures[FAILURE_TYPES];
u64 _hashes[MAX_CALLTRACES];
CallTraceSample _traces[MAX_CALLTRACES];
MethodSample _methods[MAX_CALLTRACES];
u64 _failures[ASGCT_FAILURE_TYPES];
SpinLock _locks[CONCURRENCY_LEVEL];
ASGCT_CallFrame _calltrace_buffer[CONCURRENCY_LEVEL][MAX_STACK_FRAMES];
ASGCT_CallFrame* _frame_buffer;
int _frame_buffer_size;
volatile int _frame_buffer_index;
bool _frame_buffer_overflow;
bool _threads;
CallTraceBuffer* _calltrace_buffer[CONCURRENCY_LEVEL];
int _max_stack_depth;
int _safe_mode;
CStack _cstack;
bool _add_thread_frame;
bool _update_thread_names;
volatile bool _thread_events_state;
SpinLock _jit_lock;
const void* _jit_min_address;
const void* _jit_max_address;
SpinLock _stubs_lock;
CodeCache _java_methods;
NativeCodeCache _runtime_stubs;
NativeCodeCache* _native_libs[MAX_NATIVE_LIBS];
int _native_lib_count;
volatile int _native_lib_count;
// Support for intercepting NativeLibrary.load() / NativeLibraries.load()
JNINativeMethod _load_method;
void* _original_NativeLibrary_load;
void* _trapped_NativeLibrary_load;
static jboolean JNICALL NativeLibraryLoadTrap(JNIEnv* env, jobject self, jstring name, jboolean builtin);
static jboolean JNICALL NativeLibrariesLoadTrap(JNIEnv* env, jobject self, jobject lib, jstring name, jboolean builtin, jboolean jni);
void bindNativeLibraryLoad(JNIEnv* env, bool enable);
// Support for intercepting Thread.setNativeName()
void* _original_Thread_setNativeName;
static void JNICALL ThreadSetNativeNameTrap(JNIEnv* env, jobject self, jstring name);
void bindThreadSetNativeName(JNIEnv* env, bool enable);
void switchNativeMethodTraps(bool enable);
Error installTraps(const char* begin, const char* end);
void uninstallTraps();
static void trapHandler(int signo, siginfo_t* siginfo, void* ucontext);
void trapHandlerImpl(void* ucontext);
void addJavaMethod(const void* address, int length, jmethodID method);
void removeJavaMethod(const void* address, jmethodID method);
void addRuntimeStub(const void* address, int length, const char* name);
void updateJitRange(const void* min_address, const void* max_address);
const char* findNativeMethod(const void* address);
int getNativeTrace(int tid, ASGCT_CallFrame* frames);
void onThreadStart(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread);
void onThreadEnd(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread);
const char* asgctError(int code);
u32 getLockIndex(int tid);
int getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, int tid);
int getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max_depth);
int makeEventFrame(ASGCT_CallFrame* frames, jint event_type, jmethodID event);
int getJavaTraceJvmti(jvmtiFrameInfo* jvmti_frames, ASGCT_CallFrame* frames, int max_depth);
int makeEventFrame(ASGCT_CallFrame* frames, jint event_type, uintptr_t id);
bool fillTopFrame(const void* pc, ASGCT_CallFrame* frame);
bool addressInCode(const void* pc);
u64 hashCallTrace(int num_frames, ASGCT_CallFrame* frames);
void storeCallTrace(int num_frames, ASGCT_CallFrame* frames, u64 counter);
void copyToFrameBuffer(int num_frames, ASGCT_CallFrame* frames, CallTraceSample* trace);
u64 hashMethod(jmethodID method);
void storeMethod(jmethodID method, jint bci, u64 counter);
void initStateLock();
void resetSymbols();
void setSignalHandler();
AddressType getAddressType(instruction_t* pc);
void setThreadInfo(int tid, const char* name, jlong java_thread_id);
void updateThreadName(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread);
void updateJavaThreadNames();
void updateNativeThreadNames();
bool excludeTrace(FrameName* fn, CallTrace* trace);
void mangle(const char* name, char* buf, size_t size);
Engine* selectEngine(const char* event_name);
Error checkJvmCapabilities();
public:
static Profiler _instance;
Profiler() :
_state(IDLE),
_frame_buffer(NULL),
_begin_trap(),
_end_trap(),
_thread_filter(),
_call_trace_storage(),
_jfr(),
_start_time(0),
_max_stack_depth(0),
_safe_mode(0),
_thread_events_state(JVMTI_DISABLE),
_jit_lock(),
_jit_min_address((const void*)-1),
_jit_max_address((const void*)0),
_stubs_lock(),
_java_methods(),
_runtime_stubs("[stubs]"),
_native_lib_count(0) {
initStateLock();
_native_lib_count(0),
_original_NativeLibrary_load(NULL) {
for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
_calltrace_buffer[i] = NULL;
}
}
u64 total_samples() { return _total_samples; }
u64 total_counter() { return _total_counter; }
time_t uptime() { return time(NULL) - _start_time; }
Dictionary* classMap() { return &_class_map; }
ThreadFilter* threadFilter() { return &_thread_filter; }
void run(Arguments& args);
void runInternal(Arguments& args, std::ostream& out);
void shutdown(Arguments& args);
Error start(const char* event, long interval, int frame_buffer_size, bool threads);
Error check(Arguments& args);
Error start(Arguments& args, bool reset);
Error stop();
void dumpSummary(std::ostream& out);
void switchThreadEvents(jvmtiEventMode mode);
void dump(std::ostream& out, Arguments& args);
void dumpCollapsed(std::ostream& out, Arguments& args);
void dumpFlameGraph(std::ostream& out, Arguments& args);
void dumpTraces(std::ostream& out, int max_traces);
void dumpFlat(std::ostream& out, int max_methods);
void recordSample(void* ucontext, u64 counter, jint event_type, jmethodID event);
NativeCodeCache* jvmLibrary();
void dumpFlameGraph(std::ostream& out, Arguments& args, bool tree);
void dumpFlat(std::ostream& out, Arguments& args);
ThreadState getThreadState(void* ucontext);
void recordSample(void* ucontext, u64 counter, jint event_type, Event* event);
void updateSymbols(bool kernel_symbols);
const void* resolveSymbol(const char* name);
NativeCodeCache* findNativeLibrary(const void* address);
const char* findNativeMethod(const void* address);
// CompiledMethodLoad is also needed to enable DebugNonSafepoints info by default
static void JNICALL CompiledMethodLoad(jvmtiEnv* jvmti, jmethodID method,
@@ -208,6 +217,16 @@ class Profiler {
const void* address, jint length) {
_instance.addRuntimeStub(address, length, name);
}
static void JNICALL ThreadStart(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) {
_instance.onThreadStart(jvmti, jni, thread);
}
static void JNICALL ThreadEnd(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) {
_instance.onThreadEnd(jvmti, jni, thread);
}
friend class Recording;
};
#endif // _PROFILER_H

2
src/spinLock.h Executable file → Normal file
View File

@@ -30,7 +30,7 @@ class SpinLock {
volatile int _lock;
public:
SpinLock() : _lock(0) {
SpinLock(int initial_state = 0) : _lock(initial_state) {
}
void reset() {

31
src/stackFrame.h Executable file → Normal file
View File

@@ -19,14 +19,17 @@
#include <stdint.h>
#include <ucontext.h>
#include "arch.h"
class StackFrame {
private:
ucontext_t* _ucontext;
uintptr_t stackAt(int slot) {
return ((uintptr_t*)sp())[slot];
static bool withinCurrentStack(uintptr_t address) {
// Check that the address is not too far from the stack pointer of current context
void* real_sp;
return address - (uintptr_t)&real_sp <= 0xffff;
}
public:
@@ -40,10 +43,19 @@ class StackFrame {
fp() = saved_fp;
}
bool validSP() {
return withinCurrentStack(sp());
}
uintptr_t stackAt(int slot) {
return ((uintptr_t*)sp())[slot];
}
uintptr_t& pc();
uintptr_t& sp();
uintptr_t& fp();
uintptr_t retval();
uintptr_t arg0();
uintptr_t arg1();
uintptr_t arg2();
@@ -51,7 +63,20 @@ class StackFrame {
void ret();
bool pop();
bool pop(bool trust_frame_pointer);
bool checkInterruptedSyscall();
// Look that many stack slots for a return address candidate.
// 0 = do not use stack snooping heuristics.
static int callerLookupSlots();
// Check if PC looks like a valid return address (i.e. the previous instruction is a CALL).
// It's safe to return false to skip return address heuristics.
static bool isReturnAddress(instruction_t* pc);
// Check if PC points to a syscall instruction
static bool isSyscall(instruction_t* pc);
};
#endif // _STACKFRAME_H

24
src/stackFrame_aarch64.cpp Executable file → Normal file
View File

@@ -19,6 +19,7 @@
#if defined(__aarch64__)
#include <errno.h>
#include "stackFrame.h"
@@ -37,6 +38,10 @@ uintptr_t& StackFrame::fp() {
return (uintptr_t&)_ucontext->uc_mcontext.regs[REG_FP];
}
uintptr_t StackFrame::retval() {
return (uintptr_t)_ucontext->uc_mcontext.regs[0];
}
uintptr_t StackFrame::arg0() {
return (uintptr_t)_ucontext->uc_mcontext.regs[0];
}
@@ -57,7 +62,7 @@ void StackFrame::ret() {
_ucontext->uc_mcontext.pc = _ucontext->uc_mcontext.regs[REG_LR];
}
bool StackFrame::pop() {
bool StackFrame::pop(bool trust_frame_pointer) {
if (fp() == sp()) {
// Expected frame layout:
// sp 000000nnnnnnnnnn [stack]
@@ -72,4 +77,21 @@ bool StackFrame::pop() {
return true;
}
bool StackFrame::checkInterruptedSyscall() {
return retval() == (uintptr_t)-EINTR;
}
int StackFrame::callerLookupSlots() {
return 0;
}
bool StackFrame::isReturnAddress(instruction_t* pc) {
return false;
}
bool StackFrame::isSyscall(instruction_t* pc) {
// svc #0
return *pc == 0xd4000001;
}
#endif // defined(__aarch64__)

25
src/stackFrame_arm.cpp Executable file → Normal file
View File

@@ -16,6 +16,7 @@
#if defined(__arm__) || defined(__thumb__)
#include <errno.h>
#include "stackFrame.h"
@@ -31,6 +32,10 @@ uintptr_t& StackFrame::fp() {
return (uintptr_t&)_ucontext->uc_mcontext.arm_fp;
}
uintptr_t StackFrame::retval() {
return (uintptr_t)_ucontext->uc_mcontext.arm_r0;
}
uintptr_t StackFrame::arg0() {
return (uintptr_t)_ucontext->uc_mcontext.arm_r0;
}
@@ -51,9 +56,25 @@ void StackFrame::ret() {
_ucontext->uc_mcontext.arm_pc = _ucontext->uc_mcontext.arm_lr;
}
bool StackFrame::pop() {
bool StackFrame::pop(bool trust_frame_pointer) {
return false;
}
bool StackFrame::checkInterruptedSyscall() {
return retval() == (uintptr_t)-EINTR;
}
int StackFrame::callerLookupSlots() {
return 0;
}
bool StackFrame::isReturnAddress(instruction_t* pc) {
return false;
}
bool StackFrame::isSyscall(instruction_t* pc) {
// swi #0
return *pc == 0xef000000;
}
#endif // defined(__arm__) || defined(__thumb__)

55
src/stackFrame_i386.cpp Executable file → Normal file
View File

@@ -16,6 +16,7 @@
#ifdef __i386__
#include <errno.h>
#include "stackFrame.h"
@@ -31,6 +32,10 @@ uintptr_t& StackFrame::fp() {
return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EBP];
}
uintptr_t StackFrame::retval() {
return (uintptr_t)_ucontext->uc_mcontext.gregs[REG_EAX];
}
uintptr_t StackFrame::arg0() {
return stackAt(1);
}
@@ -52,27 +57,43 @@ void StackFrame::ret() {
sp() += 4;
}
static inline bool withinCurrentStack(uintptr_t value) {
// Check that value is not too far from stack pointer of current context
void* real_sp;
return value - (uintptr_t)&real_sp <= 0xffff;
}
bool StackFrame::pop() {
if (!withinCurrentStack(sp())) {
return false;
}
if (fp() == sp() || withinCurrentStack(stackAt(0))) {
bool StackFrame::pop(bool trust_frame_pointer) {
if (trust_frame_pointer && withinCurrentStack(fp())) {
sp() = fp() + 8;
fp() = stackAt(-2);
pc() = stackAt(-1);
return true;
} else if (fp() == sp() || withinCurrentStack(stackAt(0))) {
fp() = stackAt(0);
pc() = stackAt(1);
sp() += 8;
} else {
pc() = stackAt(0);
sp() += 4;
return true;
}
return true;
return false;
}
bool StackFrame::checkInterruptedSyscall() {
return retval() == (uintptr_t)-EINTR;
}
int StackFrame::callerLookupSlots() {
return 7;
}
bool StackFrame::isReturnAddress(instruction_t* pc) {
if (pc[-5] == 0xe8) {
// call rel32
return true;
} else if (pc[-2] == 0xff && ((pc[-1] & 0xf0) == 0xd0 || (pc[-1] & 0xf0) == 0x10)) {
// call reg or call [reg]
return true;
}
return false;
}
bool StackFrame::isSyscall(instruction_t* pc) {
// int 0x80
return pc[0] == 0xcd && pc[1] == 0x80;
}
#endif // __i386__

107
src/stackFrame_x64.cpp Executable file → Normal file
View File

@@ -16,6 +16,8 @@
#ifdef __x86_64__
#include <errno.h>
#include <sys/syscall.h>
#include "stackFrame.h"
@@ -38,6 +40,10 @@ uintptr_t& StackFrame::fp() {
return (uintptr_t&)REG(REG_RBP, __rbp);
}
uintptr_t StackFrame::retval() {
return (uintptr_t)REG(REG_RAX, __rax);
}
uintptr_t StackFrame::arg0() {
return (uintptr_t)REG(REG_RDI, __rdi);
}
@@ -60,44 +66,91 @@ void StackFrame::ret() {
}
static inline bool withinCurrentStack(uintptr_t value) {
// Check that value is not too far from stack pointer of current context
void* real_sp;
return value - (uintptr_t)&real_sp <= 0xffff;
}
static inline bool isFramePrologueEpilogue(uintptr_t pc) {
unsigned int opcode = *(unsigned int*)(pc - 1);
if (opcode == 0xec834855) {
// push rbp
// sub rsp, $const
return true;
} else if (opcode == 0xec8b4855) {
// push rbp
// mov rbp, rsp
return true;
} else if ((opcode & 0xffffff00) == 0x05855d00) {
if (pc & 0xfff) {
// Make sure we are not at the page boundary, so that reading [pc - 1] is safe
unsigned int opcode = *(unsigned int*)(pc - 1);
if (opcode == 0xec834855) {
// push rbp
// sub rsp, $const
return true;
} else if (opcode == 0xec8b4855) {
// push rbp
// mov rbp, rsp
return true;
}
}
if (*(unsigned char*)pc == 0x5d && *(unsigned short*)(pc + 1) == 0x0585) {
// pop rbp
// test [polling_page], eax
return true;
}
return false;
}
bool StackFrame::pop(bool trust_frame_pointer) {
if (trust_frame_pointer && withinCurrentStack(fp())) {
sp() = fp() + 16;
fp() = stackAt(-2);
pc() = stackAt(-1);
return true;
} else if (fp() == sp() || withinCurrentStack(stackAt(0)) || isFramePrologueEpilogue(pc())) {
fp() = stackAt(0);
pc() = stackAt(1);
sp() += 16;
return true;
}
return false;
}
bool StackFrame::pop() {
if (!withinCurrentStack(sp())) {
return false;
bool StackFrame::checkInterruptedSyscall() {
#ifdef __APPLE__
// We are not interested in syscalls that do not check error code, e.g. semaphore_wait_trap
if (*(instruction_t*)pc() == 0xc3) {
return true;
}
if (fp() == sp() || withinCurrentStack(stackAt(0)) || isFramePrologueEpilogue(pc())) {
fp() = stackAt(0);
pc() = stackAt(1);
sp() += 16;
// If CF is set, the error code is in low byte of eax,
// some other syscalls (ulock_wait) do not set CF when interrupted
if (REG(REG_EFL, __rflags) & 1) {
return (retval() & 0xff) == EINTR || (retval() & 0xff) == ETIMEDOUT;
} else {
pc() = stackAt(0);
sp() += 8;
return retval() == (uintptr_t)-EINTR;
}
return true;
#else
if (retval() == (uintptr_t)-EINTR) {
// Workaround for JDK-8237858: restart the interrupted poll() manually.
// Check if the previous instruction is mov eax, SYS_poll with infinite timeout
if (arg2() == (uintptr_t)-1) {
uintptr_t pc = this->pc();
if ((pc & 0xfff) >= 7 && *(unsigned char*)(pc - 7) == 0xb8 && *(int*)(pc - 6) == SYS_poll) {
this->pc() = pc - 7;
}
}
return true;
}
return false;
#endif
}
int StackFrame::callerLookupSlots() {
return 7;
}
bool StackFrame::isReturnAddress(instruction_t* pc) {
if (pc[-5] == 0xe8) {
// call rel32
return true;
} else if (pc[-2] == 0xff && ((pc[-1] & 0xf0) == 0xd0 || (pc[-1] & 0xf0) == 0x10)) {
// call reg or call [reg]
return true;
}
return false;
}
bool StackFrame::isSyscall(instruction_t* pc) {
return pc[0] == 0x0f && pc[1] == 0x05;
}
#endif // __x86_64__

13
src/symbols.h Executable file → Normal file
View File

@@ -17,15 +17,24 @@
#ifndef _SYMBOLS_H
#define _SYMBOLS_H
#include <set>
#include "codeCache.h"
#include "mutex.h"
class Symbols {
private:
static void parseKernelSymbols(NativeCodeCache* cc);
static Mutex _parse_lock;
static std::set<const void*> _parsed_libraries;
static bool _have_kernel_symbols;
public:
static int parseMaps(NativeCodeCache** array, int size);
static void parseKernelSymbols(NativeCodeCache* cc);
static void parseLibraries(NativeCodeCache** array, volatile int& count, int size, bool kernel_symbols);
static bool haveKernelSymbols() {
return _have_kernel_symbols;
}
};
#endif // _SYMBOLS_H

112
src/symbols_linux.cpp Executable file → Normal file
View File

@@ -16,12 +16,14 @@
#ifdef __linux__
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <elf.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <linux/limits.h>
@@ -29,6 +31,7 @@
#include <iostream>
#include <string>
#include "symbols.h"
#include "arch.h"
class SymbolDesc {
@@ -87,12 +90,16 @@ typedef Elf64_Ehdr ElfHeader;
typedef Elf64_Shdr ElfSection;
typedef Elf64_Nhdr ElfNote;
typedef Elf64_Sym ElfSymbol;
typedef Elf64_Rel ElfRelocation;
#define ELF_R_SYM ELF64_R_SYM
#else
const unsigned char ELFCLASS_SUPPORTED = ELFCLASS32;
typedef Elf32_Ehdr ElfHeader;
typedef Elf32_Shdr ElfSection;
typedef Elf32_Nhdr ElfNote;
typedef Elf32_Sym ElfSymbol;
typedef Elf32_Rel ElfRelocation;
#define ELF_R_SYM ELF32_R_SYM
#endif // __LP64__
@@ -133,10 +140,11 @@ class ElfParser {
bool loadSymbolsUsingBuildId();
bool loadSymbolsUsingDebugLink();
void loadSymbolTable(ElfSection* symtab);
void addRelocationSymbols(ElfSection* reltab, const char* plt);
public:
static bool parseFile(NativeCodeCache* cc, const char* base, const char* file_name, bool use_debug);
static void parseMem(NativeCodeCache* cc, const char* base, const void* addr);
static void parseMem(NativeCodeCache* cc, const char* base);
};
@@ -165,7 +173,14 @@ bool ElfParser::parseFile(NativeCodeCache* cc, const char* base, const char* fil
void* addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (addr != NULL) {
if (addr == MAP_FAILED) {
if (strcmp(file_name, "/") == 0) {
// https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1843018
fprintf(stderr, "Could not parse symbols due to the OS bug\n");
} else {
fprintf(stderr, "Could not parse symbols from %s: %s\n", file_name, strerror(errno));
}
} else {
ElfParser elf(cc, base, addr, file_name);
elf.loadSymbols(use_debug);
munmap(addr, length);
@@ -173,8 +188,8 @@ bool ElfParser::parseFile(NativeCodeCache* cc, const char* base, const char* fil
return true;
}
void ElfParser::parseMem(NativeCodeCache* cc, const char* base, const void* addr) {
ElfParser elf(cc, base, addr);
void ElfParser::parseMem(NativeCodeCache* cc, const char* base) {
ElfParser elf(cc, base, base);
elf.loadSymbols(false);
}
@@ -187,13 +202,13 @@ void ElfParser::loadSymbols(bool use_debug) {
ElfSection* section = findSection(SHT_SYMTAB, ".symtab");
if (section != NULL) {
loadSymbolTable(section);
return;
goto loaded;
}
// Try to load symbols from an external debuginfo library
if (use_debug) {
if (loadSymbolsUsingBuildId() || loadSymbolsUsingDebugLink()) {
return;
goto loaded;
}
}
@@ -202,6 +217,19 @@ void ElfParser::loadSymbols(bool use_debug) {
if (section != NULL) {
loadSymbolTable(section);
}
loaded:
// Synthesize names for PLT stubs
if (use_debug) {
ElfSection* plt = findSection(SHT_PROGBITS, ".plt");
ElfSection* reltab = findSection(SHT_RELA, ".rela.plt");
if (reltab == NULL) {
reltab = findSection(SHT_REL, ".rel.plt");
}
if (plt != NULL && reltab != NULL) {
addRelocationSymbols(reltab, _base + plt->sh_offset + PLT_HEADER_SIZE);
}
}
}
// Load symbols from /usr/lib/debug/.build-id/ab/cdef1234.debug, where abcdef1234 is Build ID
@@ -252,17 +280,17 @@ bool ElfParser::loadSymbolsUsingDebugLink() {
// 1. /path/to/libjvm.so.debug
if (strcmp(debuglink, basename + 1) != 0 &&
snprintf(path, sizeof(path), "%s/%s", dirname, debuglink) < sizeof(path)) {
snprintf(path, PATH_MAX, "%s/%s", dirname, debuglink) < PATH_MAX) {
result = parseFile(_cc, _base, path, false);
}
// 2. /path/to/.debug/libjvm.so.debug
if (!result && snprintf(path, sizeof(path), "%s/.debug/%s", dirname, debuglink) < sizeof(path)) {
if (!result && snprintf(path, PATH_MAX, "%s/.debug/%s", dirname, debuglink) < PATH_MAX) {
result = parseFile(_cc, _base, path, false);
}
// 3. /usr/lib/debug/path/to/libjvm.so.debug
if (!result && snprintf(path, sizeof(path), "/usr/lib/debug%s/%s", dirname, debuglink) < sizeof(path)) {
if (!result && snprintf(path, PATH_MAX, "/usr/lib/debug%s/%s", dirname, debuglink) < PATH_MAX) {
result = parseFile(_cc, _base, path, false);
}
@@ -284,6 +312,37 @@ void ElfParser::loadSymbolTable(ElfSection* symtab) {
}
}
void ElfParser::addRelocationSymbols(ElfSection* reltab, const char* plt) {
ElfSection* symtab = section(reltab->sh_link);
const char* symbols = at(symtab);
ElfSection* strtab = section(symtab->sh_link);
const char* strings = at(strtab);
const char* relocations = at(reltab);
const char* relocations_end = relocations + reltab->sh_size;
for (; relocations < relocations_end; relocations += reltab->sh_entsize) {
ElfRelocation* r = (ElfRelocation*)relocations;
ElfSymbol* sym = (ElfSymbol*)(symbols + ELF_R_SYM(r->r_info) * symtab->sh_entsize);
char name[256];
if (sym->st_name == 0) {
strcpy(name, "@plt");
} else {
const char* sym_name = strings + sym->st_name;
snprintf(name, sizeof(name), "%s%cplt", sym_name, sym_name[0] == '_' && sym_name[1] == 'Z' ? '.' : '@');
name[sizeof(name) - 1] = 0;
}
_cc->add(plt, PLT_ENTRY_SIZE, name);
plt += PLT_ENTRY_SIZE;
}
}
Mutex Symbols::_parse_lock;
std::set<const void*> Symbols::_parsed_libraries;
bool Symbols::_have_kernel_symbols = false;
void Symbols::parseKernelSymbols(NativeCodeCache* cc) {
std::ifstream maps("/proc/kallsyms");
@@ -297,18 +356,26 @@ void Symbols::parseKernelSymbols(NativeCodeCache* cc) {
const char* addr = symbol.addr();
if (addr != NULL) {
cc->add(addr, 0, symbol.name());
_have_kernel_symbols = true;
}
}
}
}
int Symbols::parseMaps(NativeCodeCache** array, int size) {
int count = 0;
if (count < size) {
void Symbols::parseLibraries(NativeCodeCache** array, volatile int& count, int size, bool kernel_symbols) {
MutexLocker ml(_parse_lock);
if (kernel_symbols && !haveKernelSymbols()) {
NativeCodeCache* cc = new NativeCodeCache("[kernel]");
parseKernelSymbols(cc);
cc->sort();
array[count++] = cc;
if (haveKernelSymbols()) {
cc->sort();
array[count] = cc;
atomicInc(count);
} else {
delete cc;
}
}
std::ifstream maps("/proc/self/maps");
@@ -317,21 +384,24 @@ int Symbols::parseMaps(NativeCodeCache** array, int size) {
while (count < size && std::getline(maps, str)) {
MemoryMapDesc map(str.c_str());
if (map.isExecutable() && map.file() != NULL && map.file()[0] != 0) {
NativeCodeCache* cc = new NativeCodeCache(map.file(), map.addr(), map.end());
const char* base = map.addr() - map.offs();
const char* image_base = map.addr();
if (!_parsed_libraries.insert(image_base).second) {
continue; // the library was already parsed
}
NativeCodeCache* cc = new NativeCodeCache(map.file(), image_base, map.end());
if (map.inode() != 0) {
ElfParser::parseFile(cc, base, map.file(), true);
ElfParser::parseFile(cc, image_base - map.offs(), map.file(), true);
} else if (strcmp(map.file(), "[vdso]") == 0) {
ElfParser::parseMem(cc, base, base);
ElfParser::parseMem(cc, image_base);
}
cc->sort();
array[count++] = cc;
array[count] = cc;
atomicInc(count);
}
}
return count;
}
#endif // __linux__

134
src/symbols_macos.cpp Executable file → Normal file
View File

@@ -16,58 +16,97 @@
#ifdef __APPLE__
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <arpa/inet.h>
#include <mach-o/dyld.h>
#include <mach-o/loader.h>
#include <mach-o/fat.h>
#include <mach-o/nlist.h>
#include "symbols.h"
#include "arch.h"
class MachOParser {
private:
NativeCodeCache* _cc;
const char* _base;
const char* _header;
const mach_header* _image_base;
load_command* findCommand(uint32_t command) {
mach_header_64* header = (mach_header_64*)_header;
load_command* result = (load_command*)(_header + sizeof(mach_header_64));
for (uint32_t i = 0; i < header->ncmds; i++) {
if (result->cmd == command) {
return result;
}
result = (load_command*)((uintptr_t)result + result->cmdsize);
}
return NULL;
static const char* add(const void* base, uint32_t offset) {
return (const char*)base + offset;
}
void loadSymbols() {
symtab_command* symtab = (symtab_command*)findCommand(LC_SYMTAB);
if (symtab == NULL) {
return;
}
nlist_64* symbol_table = (nlist_64*)(_header + symtab->symoff);
const char* str_table = _header + symtab->stroff;
void loadSymbols(mach_header_64* header, symtab_command* symtab) {
nlist_64* symbol_table = (nlist_64*)add(header, symtab->symoff);
const char* str_table = add(header, symtab->stroff);
for (uint32_t i = 0; i < symtab->nsyms; i++) {
nlist_64 sym = symbol_table[i];
if ((sym.n_type & 0xee) == 0x0e && sym.n_value != 0) {
_cc->add(_base + sym.n_value, 0, str_table + sym.n_un.n_strx + 1);
const char* addr = add(_image_base, sym.n_value);
const char* name = str_table + sym.n_un.n_strx;
if (name[0] == '_') name++;
_cc->add(addr, 0, name);
}
}
}
public:
MachOParser(NativeCodeCache* cc, const char* base, const char* header) : _cc(cc), _base(base), _header(header) {
void parseMachO(mach_header_64* header) {
load_command* lc = (load_command*)(header + 1);
for (uint32_t i = 0; i < header->ncmds; i++) {
if (lc->cmd == LC_SYMTAB) {
loadSymbols(header, (symtab_command*)lc);
break;
}
lc = (load_command*)add(lc, lc->cmdsize);
}
}
static void parseFile(NativeCodeCache* cc, const char* base, const char* file_name) {
void parseFatObject(fat_header* header) {
int narch = header->nfat_arch;
fat_arch* arch = (fat_arch*)(header + 1);
for (uint32_t i = 0; i < narch; i++) {
if (arch[i].cputype == _image_base->cputype &&
arch[i].cpusubtype == _image_base->cpusubtype) {
parseMachO((mach_header_64*)add(header, arch[i].offset));
}
}
}
// The same as parseFatObject, but fields are big-endian
void parseFatObjectBE(fat_header* header) {
int narch = htonl(header->nfat_arch);
fat_arch* arch = (fat_arch*)(header + 1);
for (uint32_t i = 0; i < narch; i++) {
if (htonl(arch[i].cputype) == _image_base->cputype &&
htonl(arch[i].cpusubtype) == _image_base->cpusubtype) {
parseMachO((mach_header_64*)add(header, htonl(arch[i].offset)));
}
}
}
void parse(mach_header* header) {
uint32_t magic = header->magic;
if (magic == MH_MAGIC_64) {
parseMachO((mach_header_64*)header);
} else if (magic == FAT_MAGIC) {
parseFatObject((fat_header*)header);
} else if (magic == FAT_CIGAM) {
parseFatObjectBE((fat_header*)header);
}
}
public:
MachOParser(NativeCodeCache* cc, const mach_header* image_base) : _cc(cc), _image_base(image_base) {
}
static void parseFile(NativeCodeCache* cc, const mach_header* image_base, const char* file_name) {
int fd = open(file_name, O_RDONLY);
if (fd == -1) {
return;
@@ -77,38 +116,43 @@ class MachOParser {
void* addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (addr != NULL) {
MachOParser parser(cc, base, (const char*)addr);
parser.loadSymbols();
if (addr == MAP_FAILED) {
fprintf(stderr, "Could not parse symbols from %s: %s\n", file_name, strerror(errno));
} else {
MachOParser parser(cc, image_base);
parser.parse((mach_header*)addr);
munmap(addr, length);
}
}
};
Mutex Symbols::_parse_lock;
std::set<const void*> Symbols::_parsed_libraries;
bool Symbols::_have_kernel_symbols = false;
void Symbols::parseKernelSymbols(NativeCodeCache* cc) {
}
int Symbols::parseMaps(NativeCodeCache** array, int size) {
int count = 0;
void Symbols::parseLibraries(NativeCodeCache** array, volatile int& count, int size, bool kernel_symbols) {
MutexLocker ml(_parse_lock);
uint32_t images = _dyld_image_count();
for (uint32_t i = 0; i < images && count < size; i++) {
const char* path = _dyld_get_image_name(i);
const char* base = (const char*)_dyld_get_image_vmaddr_slide(i);
// For now load only libjvm symbols. As soon as native stack traces
// are supported on macOS, we'll take care about other native libraries
size_t length = strlen(path);
if (length >= 12 && strcmp(path + length - 12, "libjvm.dylib") == 0) {
NativeCodeCache* cc = new NativeCodeCache(path);
MachOParser::parseFile(cc, base, path);
cc->sort();
array[count++] = cc;
const mach_header* image_base = _dyld_get_image_header(i);
if (!_parsed_libraries.insert(image_base).second) {
continue; // the library was already parsed
}
}
return count;
const char* path = _dyld_get_image_name(i);
NativeCodeCache* cc = new NativeCodeCache(path);
MachOParser::parseFile(cc, image_base, path);
cc->sort();
array[count] = cc;
atomicInc(count);
}
}
#endif // __APPLE__

127
src/threadFilter.cpp Normal file
View File

@@ -0,0 +1,127 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h>
#include <string.h>
#include "threadFilter.h"
#include "os.h"
ThreadFilter::ThreadFilter() {
memset(_bitmap, 0, sizeof(_bitmap));
_bitmap[0] = (u32*)OS::safeAlloc(BITMAP_SIZE);
_enabled = false;
_size = 0;
}
ThreadFilter::~ThreadFilter() {
for (int i = 0; i < MAX_BITMAPS; i++) {
if (_bitmap[i] != NULL) {
OS::safeFree(_bitmap[i], BITMAP_SIZE);
}
}
}
void ThreadFilter::init(const char* filter) {
if (filter == NULL) {
_enabled = false;
return;
}
char* end;
do {
int id = strtol(filter, &end, 0);
if (id <= 0) {
break;
}
if (*end == '-') {
int to = strtol(end + 1, &end, 0);
while (id <= to) {
add(id++);
}
} else {
add(id);
}
filter = end + 1;
} while (*end);
_enabled = true;
}
void ThreadFilter::clear() {
for (int i = 0; i < MAX_BITMAPS; i++) {
if (_bitmap[i] != NULL) {
memset(_bitmap[i], 0, BITMAP_SIZE);
}
}
_size = 0;
}
bool ThreadFilter::accept(int thread_id) {
u32* b = bitmap(thread_id);
return b != NULL && (word(b, thread_id) & (1 << (thread_id & 0x1f)));
}
void ThreadFilter::add(int thread_id) {
u32* b = bitmap(thread_id);
if (b == NULL) {
b = (u32*)OS::safeAlloc(BITMAP_SIZE);
u32* oldb = __sync_val_compare_and_swap(&_bitmap[(u32)thread_id / BITMAP_CAPACITY], NULL, b);
if (oldb != NULL) {
OS::safeFree(b, BITMAP_SIZE);
b = oldb;
}
}
u32 bit = 1 << (thread_id & 0x1f);
if (!(__sync_fetch_and_or(&word(b, thread_id), bit) & bit)) {
atomicInc(_size);
}
}
void ThreadFilter::remove(int thread_id) {
u32* b = bitmap(thread_id);
if (b == NULL) {
return;
}
u32 bit = 1 << (thread_id & 0x1f);
if (__sync_fetch_and_and(&word(b, thread_id), ~bit) & bit) {
atomicInc(_size, -1);
}
}
void ThreadFilter::collect(std::vector<int>& v) {
for (int i = 0; i < MAX_BITMAPS; i++) {
u32* b = _bitmap[i];
if (b != NULL) {
int start_id = i * BITMAP_CAPACITY;
for (int j = 0; j < BITMAP_SIZE / sizeof(u32); j++) {
u32 word = b[j];
if (word) {
for (int bit = 0; bit < 32; bit++) {
if (word & (1 << bit)) {
v.push_back(start_id + j * 32 + bit);
}
}
}
}
}
}
}

70
src/threadFilter.h Normal file
View File

@@ -0,0 +1,70 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _THREADFILTER_H
#define _THREADFILTER_H
#include <vector>
#include "arch.h"
// The size of thread ID bitmap in bytes. Must be at least 64K to allow mmap()
const u32 BITMAP_SIZE = 65536;
// How many thread IDs one bitmap can hold
const u32 BITMAP_CAPACITY = BITMAP_SIZE * 8;
// Total number of bitmaps required to hold the entire range of thread IDs
const u32 MAX_BITMAPS = (1 << 31) / BITMAP_CAPACITY;
// ThreadFilter query operations must be lock-free and signal-safe;
// update operations are mostly lock-free, except rare bitmap allocations
class ThreadFilter {
private:
u32* _bitmap[MAX_BITMAPS];
bool _enabled;
volatile int _size;
u32* bitmap(int thread_id) {
return _bitmap[(u32)thread_id / BITMAP_CAPACITY];
}
u32& word(u32* bitmap, int thread_id) {
return bitmap[((u32)thread_id % BITMAP_CAPACITY) >> 5];
}
public:
ThreadFilter();
~ThreadFilter();
bool enabled() {
return _enabled;
}
int size() {
return _size;
}
void init(const char* filter);
void clear();
bool accept(int thread_id);
void add(int thread_id);
void remove(int thread_id);
void collect(std::vector<int>& v);
};
#endif // _THREADFILTER_H

63
src/trap.cpp Normal file
View File

@@ -0,0 +1,63 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <unistd.h>
#include <sys/mman.h>
#include "trap.h"
bool Trap::assign(const void* address) {
uintptr_t entry = (uintptr_t)address;
if (entry == 0) {
_entry = 0;
return true;
}
#if defined(__arm__) || defined(__thumb__)
if (entry & 1) {
entry ^= 1;
_breakpoint_insn = BREAKPOINT_THUMB;
}
#endif
if (entry != _entry) {
// Make the entry point writable, so we can rewrite instructions
long page_size = sysconf(_SC_PAGESIZE);
if (mprotect((void*)(entry & -page_size), page_size, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) {
return false;
}
_entry = entry;
_saved_insn = *(instruction_t*)entry;
}
return true;
}
// Insert breakpoint at the very first instruction
void Trap::install() {
if (_entry) {
*(instruction_t*)_entry = _breakpoint_insn;
flushCache(_entry);
}
}
// Clear breakpoint - restore the original instruction
void Trap::uninstall() {
if (_entry) {
*(instruction_t*)_entry = _saved_insn;
flushCache(_entry);
}
}

48
src/trap.h Normal file
View File

@@ -0,0 +1,48 @@
/*
* Copyright 2020 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _TRAP_H
#define _TRAP_H
#include <stdint.h>
#include "arch.h"
class Trap {
private:
uintptr_t _entry;
instruction_t _breakpoint_insn;
instruction_t _saved_insn;
public:
Trap() : _entry(0), _breakpoint_insn(BREAKPOINT) {
}
uintptr_t entry() {
return _entry;
}
bool covers(uintptr_t pc) {
// PC points either to BREAKPOINT instruction or to the next one
return pc - _entry <= sizeof(instruction_t);
}
bool assign(const void* address);
void install();
void uninstall();
};
#endif // _TRAP_H

158
src/vmEntry.cpp Executable file → Normal file
View File

@@ -16,18 +16,30 @@
#include <fstream>
#include <dlfcn.h>
#include <stdlib.h>
#include <string.h>
#include "vmEntry.h"
#include "arguments.h"
#include "javaApi.h"
#include "os.h"
#include "profiler.h"
#include "perfEvents.h"
#include "instrument.h"
#include "lockTracer.h"
#include "vmStructs.h"
static Arguments _agent_args;
JavaVM* VM::_vm;
jvmtiEnv* VM::_jvmti = NULL;
int VM::_hotspot_version = 0;
void* VM::_libjvm;
void* VM::_libjava;
AsyncGetCallTrace VM::_asyncGetCallTrace;
JVM_GetManagement VM::_getManagement;
jvmtiError (JNICALL *VM::_orig_RedefineClasses)(jvmtiEnv*, jint, const jvmtiClassDefinition*);
jvmtiError (JNICALL *VM::_orig_RetransformClasses)(jvmtiEnv*, jint, const jclass* classes);
volatile int VM::_in_redefine_classes = 0;
void VM::init(JavaVM* vm, bool attach) {
@@ -36,8 +48,47 @@ void VM::init(JavaVM* vm, bool attach) {
_vm = vm;
_vm->GetEnv((void**)&_jvmti, JVMTI_VERSION_1_0);
char* prop;
if (_jvmti->GetSystemProperty("java.vm.name", &prop) == 0) {
bool is_hotspot = strstr(prop, "OpenJDK") != NULL ||
strstr(prop, "HotSpot") != NULL ||
strstr(prop, "GraalVM") != NULL;
_jvmti->Deallocate((unsigned char*)prop);
if (is_hotspot && _jvmti->GetSystemProperty("java.vm.version", &prop) == 0) {
if (strncmp(prop, "25.", 3) == 0) {
_hotspot_version = 8;
} else if (strncmp(prop, "24.", 3) == 0) {
_hotspot_version = 7;
} else if (strncmp(prop, "20.", 3) == 0) {
_hotspot_version = 6;
} else if ((_hotspot_version = atoi(prop)) < 9) {
_hotspot_version = 9;
}
_jvmti->Deallocate((unsigned char*)prop);
}
if (is_hotspot) {
JVMTIFunctions* functions = *(JVMTIFunctions**)_jvmti;
_orig_RedefineClasses = functions->RedefineClasses;
_orig_RetransformClasses = functions->RetransformClasses;
functions->RedefineClasses = RedefineClassesHook;
functions->RetransformClasses = RetransformClassesHook;
}
}
_libjvm = getLibraryHandle("libjvm.so");
_asyncGetCallTrace = (AsyncGetCallTrace)dlsym(_libjvm, "AsyncGetCallTrace");
_getManagement = (JVM_GetManagement)dlsym(_libjvm, "JVM_GetManagement");
if (attach) {
ready();
}
jvmtiCapabilities capabilities = {0};
capabilities.can_generate_all_class_hook_events = 1;
capabilities.can_retransform_classes = 1;
capabilities.can_retransform_any_class = 1;
capabilities.can_get_bytecodes = 1;
capabilities.can_get_constant_pool = 1;
capabilities.can_get_source_file_name = 1;
@@ -52,11 +103,12 @@ void VM::init(JavaVM* vm, bool attach) {
callbacks.VMDeath = VMDeath;
callbacks.ClassLoad = ClassLoad;
callbacks.ClassPrepare = ClassPrepare;
callbacks.ClassFileLoadHook = Instrument::ClassFileLoadHook;
callbacks.CompiledMethodLoad = Profiler::CompiledMethodLoad;
callbacks.CompiledMethodUnload = Profiler::CompiledMethodUnload;
callbacks.DynamicCodeGenerated = Profiler::DynamicCodeGenerated;
callbacks.ThreadStart = PerfEvents::ThreadStart;
callbacks.ThreadEnd = PerfEvents::ThreadEnd;
callbacks.ThreadStart = Profiler::ThreadStart;
callbacks.ThreadEnd = Profiler::ThreadEnd;
callbacks.MonitorContendedEnter = LockTracer::MonitorContendedEnter;
callbacks.MonitorContendedEntered = LockTracer::MonitorContendedEntered;
_jvmti->SetEventCallbacks(&callbacks, sizeof(callbacks));
@@ -69,28 +121,51 @@ void VM::init(JavaVM* vm, bool attach) {
_jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_COMPILED_METHOD_UNLOAD, NULL);
_jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL);
PerfEvents::init();
_asyncGetCallTrace = (AsyncGetCallTrace)dlsym(RTLD_DEFAULT, "AsyncGetCallTrace");
if (_asyncGetCallTrace == NULL) {
// Unable to locate AsyncGetCallTrace, it is likely that JVM has been started
// by JNI_CreateJavaVM() via dynamically loaded libjvm.so from a C/C++ program
void* libjvm_handle = dlopen("libjvm.so", RTLD_NOW);
if (!libjvm_handle) {
std::cerr << "Failed to load libjvm.so: " << dlerror() << std::endl;
}
// Try loading AGCT after opening libjvm.so
_asyncGetCallTrace = (AsyncGetCallTrace)dlsym(libjvm_handle, "AsyncGetCallTrace");
}
if (attach) {
loadAllMethodIDs(_jvmti);
loadAllMethodIDs(jvmti(), jni());
_jvmti->GenerateEvents(JVMTI_EVENT_DYNAMIC_CODE_GENERATED);
_jvmti->GenerateEvents(JVMTI_EVENT_COMPILED_METHOD_LOAD);
}
}
void VM::loadMethodIDs(jvmtiEnv* jvmti, jclass klass) {
// Run late initialization when JVM is ready
void VM::ready() {
Profiler::_instance.updateSymbols(false);
NativeCodeCache* libjvm = Profiler::_instance.findNativeLibrary((const void*)_asyncGetCallTrace);
if (libjvm != NULL) {
VMStructs::init(libjvm);
}
_libjava = getLibraryHandle("libjava.so");
}
void* VM::getLibraryHandle(const char* name) {
if (!OS::isJavaLibraryVisible()) {
void* handle = dlopen(name, RTLD_LAZY);
if (handle != NULL) {
return handle;
}
std::cerr << "Failed to load " << name << ": " << dlerror() << std::endl;
}
return RTLD_DEFAULT;
}
void VM::loadMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni, jclass klass) {
if (VMStructs::hasClassLoaderData()) {
VMKlass* vmklass = VMKlass::fromJavaClass(jni, klass);
int method_count = vmklass->methodCount();
if (method_count > 0) {
ClassLoaderData* cld = vmklass->classLoaderData();
cld->lock();
// Workaround for JVM bug: preallocate space for jmethodIDs
// at the beginning of the list (rather than at the end)
for (int i = 0; i < method_count; i += MethodList::SIZE) {
*cld->methodList() = new MethodList(*cld->methodList());
}
cld->unlock();
}
}
jint method_count;
jmethodID* methods;
if (jvmti->GetClassMethods(klass, &method_count, &methods) == 0) {
@@ -98,19 +173,21 @@ void VM::loadMethodIDs(jvmtiEnv* jvmti, jclass klass) {
}
}
void VM::loadAllMethodIDs(jvmtiEnv* jvmti) {
void VM::loadAllMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni) {
jint class_count;
jclass* classes;
if (jvmti->GetLoadedClasses(&class_count, &classes) == 0) {
for (int i = 0; i < class_count; i++) {
loadMethodIDs(jvmti, classes[i]);
loadMethodIDs(jvmti, jni, classes[i]);
}
jvmti->Deallocate((unsigned char*)classes);
}
}
void JNICALL VM::VMInit(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) {
loadAllMethodIDs(jvmti);
ready();
loadAllMethodIDs(jvmti, jni);
// Delayed start of profiler if agent has been loaded at VM bootstrap
Profiler::_instance.run(_agent_args);
}
@@ -119,6 +196,38 @@ void JNICALL VM::VMDeath(jvmtiEnv* jvmti, JNIEnv* jni) {
Profiler::_instance.shutdown(_agent_args);
}
jvmtiError VM::RedefineClassesHook(jvmtiEnv* jvmti, jint class_count, const jvmtiClassDefinition* class_definitions) {
atomicInc(_in_redefine_classes);
jvmtiError result = _orig_RedefineClasses(jvmti, class_count, class_definitions);
// jmethodIDs are invalidated after RedefineClasses
JNIEnv* env = jni();
for (int i = 0; i < class_count; i++) {
if (class_definitions[i].klass != NULL) {
loadMethodIDs(jvmti, env, class_definitions[i].klass);
}
}
atomicInc(_in_redefine_classes, -1);
return result;
}
jvmtiError VM::RetransformClassesHook(jvmtiEnv* jvmti, jint class_count, const jclass* classes) {
atomicInc(_in_redefine_classes);
jvmtiError result = _orig_RetransformClasses(jvmti, class_count, classes);
// jmethodIDs are invalidated after RetransformClasses
JNIEnv* env = jni();
for (int i = 0; i < class_count; i++) {
if (classes[i] != NULL) {
loadMethodIDs(jvmti, env, classes[i]);
}
}
atomicInc(_in_redefine_classes, -1);
return result;
}
extern "C" JNIEXPORT jint JNICALL
Agent_OnLoad(JavaVM* vm, char* options, void* reserved) {
@@ -145,7 +254,9 @@ Agent_OnAttach(JavaVM* vm, char* options, void* reserved) {
}
// Save the arguments in case of shutdown
_agent_args = args;
if (args._action == ACTION_START || args._action == ACTION_RESUME) {
_agent_args.save(args);
}
Profiler::_instance.run(args);
return 0;
@@ -154,5 +265,6 @@ Agent_OnAttach(JavaVM* vm, char* options, void* reserved) {
extern "C" JNIEXPORT jint JNICALL
JNI_OnLoad(JavaVM* vm, void* reserved) {
VM::init(vm, true);
JavaAPI::registerNatives(VM::jvmti(), VM::jni());
return JNI_VERSION_1_6;
}

79
src/vmEntry.h Executable file → Normal file
View File

@@ -20,12 +20,39 @@
#include <jvmti.h>
#if __GNUC__ == 4
# undef JNIEXPORT
# define JNIEXPORT __attribute__((visibility("default")))
#endif
// Denotes ASGCT_CallFrame where method_id has special meaning (not jmethodID)
enum ASGCT_CallFrameType {
BCI_NATIVE_FRAME = -10, // method_id is native function name (char*)
BCI_SYMBOL = -11, // method_id is VMSymbol*
BCI_SYMBOL_OUTSIDE_TLAB = -12, // VMSymbol* specifically for allocations outside TLAB
BCI_THREAD_ID = -13, // method_id designates a thread
BCI_NATIVE_FRAME = -10, // native function name (char*)
BCI_ALLOC = -11, // name of the allocated class
BCI_ALLOC_OUTSIDE_TLAB = -12, // name of the class allocated outside TLAB
BCI_LOCK = -13, // class name of the locked object
BCI_PARK = -14, // class name of the park() blocker
BCI_THREAD_ID = -15, // method_id designates a thread
BCI_ERROR = -16, // method_id is an error string
BCI_INSTRUMENT = -17, // synthetic method_id that should not appear in the call stack
};
// See hotspot/src/share/vm/prims/forte.cpp
enum ASGCT_Failure {
ticks_no_Java_frame = 0,
ticks_no_class_load = -1,
ticks_GC_active = -2,
ticks_unknown_not_Java = -3,
ticks_not_walkable_not_Java = -4,
ticks_unknown_Java = -5,
ticks_not_walkable_Java = -6,
ticks_unknown_state = -7,
ticks_thread_exit = -8,
ticks_deopt = -9,
ticks_safepoint = -10,
ticks_skipped = -11,
ASGCT_FAILURE_TYPES = 12
};
typedef struct {
@@ -41,16 +68,39 @@ typedef struct {
typedef void (*AsyncGetCallTrace)(ASGCT_CallTrace*, jint, void*);
typedef struct {
void* unused[38];
jstring (JNICALL *ExecuteDiagnosticCommand)(JNIEnv*, jstring);
} VMManagement;
typedef VMManagement* (*JVM_GetManagement)(jint);
typedef struct {
void* unused1[86];
jvmtiError (JNICALL *RedefineClasses)(jvmtiEnv*, jint, const jvmtiClassDefinition*);
void* unused2[64];
jvmtiError (JNICALL *RetransformClasses)(jvmtiEnv*, jint, const jclass*);
} JVMTIFunctions;
class VM {
private:
static JavaVM* _vm;
static jvmtiEnv* _jvmti;
static JVM_GetManagement _getManagement;
static jvmtiError (JNICALL *_orig_RedefineClasses)(jvmtiEnv*, jint, const jvmtiClassDefinition*);
static jvmtiError (JNICALL *_orig_RetransformClasses)(jvmtiEnv*, jint, const jclass* classes);
static volatile int _in_redefine_classes;
static int _hotspot_version;
static void loadMethodIDs(jvmtiEnv* jvmti, jclass klass);
static void loadAllMethodIDs(jvmtiEnv* jvmti);
static void ready();
static void* getLibraryHandle(const char* name);
static void loadMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni, jclass klass);
static void loadAllMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni);
public:
static void* _libjvm;
static void* _libjava;
static AsyncGetCallTrace _asyncGetCallTrace;
static void init(JavaVM* vm, bool attach);
@@ -64,6 +114,18 @@ class VM {
return _vm->GetEnv((void**)&jni, JNI_VERSION_1_6) == 0 ? jni : NULL;
}
static VMManagement* management() {
return _getManagement != NULL ? _getManagement(0x20030000) : NULL;
}
static int hotspot_version() {
return _hotspot_version;
}
static bool inRedefineClasses() {
return _in_redefine_classes > 0;
}
static void JNICALL VMInit(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread);
static void JNICALL VMDeath(jvmtiEnv* jvmti, JNIEnv* jni);
@@ -72,8 +134,11 @@ class VM {
}
static void JNICALL ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, jclass klass) {
loadMethodIDs(jvmti, klass);
loadMethodIDs(jvmti, jni, klass);
}
static jvmtiError JNICALL RedefineClassesHook(jvmtiEnv* jvmti, jint class_count, const jvmtiClassDefinition* class_definitions);
static jvmtiError JNICALL RetransformClassesHook(jvmtiEnv* jvmti, jint class_count, const jclass* classes);
};
#endif // _VMENTRY_H

185
src/vmStructs.cpp Executable file → Normal file
View File

@@ -14,22 +14,51 @@
* limitations under the License.
*/
#include <pthread.h>
#include <stdint.h>
#include <string.h>
#include "vmStructs.h"
#include "codeCache.h"
#include "vmEntry.h"
NativeCodeCache* VMStructs::_libjvm = NULL;
bool VMStructs::_has_class_names = false;
bool VMStructs::_has_class_loader_data = false;
bool VMStructs::_has_thread_bridge = false;
bool VMStructs::_has_perm_gen = false;
int VMStructs::_klass_name_offset = -1;
int VMStructs::_symbol_length_offset = -1;
int VMStructs::_symbol_length_and_refcount_offset = -1;
int VMStructs::_symbol_body_offset = -1;
int VMStructs::_class_klass_offset = -1;
int VMStructs::_class_loader_data_offset = -1;
int VMStructs::_methods_offset = -1;
int VMStructs::_thread_osthread_offset = -1;
int VMStructs::_thread_anchor_offset = -1;
int VMStructs::_thread_state_offset = -1;
int VMStructs::_osthread_id_offset = -1;
bool VMStructs::_has_perm_gen = false;
int VMStructs::_anchor_sp_offset = -1;
int VMStructs::_anchor_pc_offset = -1;
int VMStructs::_frame_size_offset = -1;
int VMStructs::_is_gc_active_offset = -1;
char* VMStructs::_collected_heap_addr = NULL;
static uintptr_t readSymbol(NativeCodeCache* lib, const char* symbol_name) {
const void* symbol = lib->findSymbol(symbol_name);
jfieldID VMStructs::_eetop;
jfieldID VMStructs::_tid;
jfieldID VMStructs::_klass = NULL;
int VMStructs::_tls_index = -1;
intptr_t VMStructs::_env_offset;
VMStructs::GetStackTraceFunc VMStructs::_get_stack_trace = NULL;
VMStructs::UnsafeParkFunc VMStructs::_unsafe_park = NULL;
VMStructs::FindBlobFunc VMStructs::_find_blob = NULL;
VMStructs::LockFunc VMStructs::_lock_func;
VMStructs::LockFunc VMStructs::_unlock_func;
uintptr_t VMStructs::readSymbol(const char* symbol_name) {
const void* symbol = _libjvm->findSymbol(symbol_name);
if (symbol == NULL) {
// Avoid JVM crash in case of missing symbols
return 0;
@@ -37,27 +66,35 @@ static uintptr_t readSymbol(NativeCodeCache* lib, const char* symbol_name) {
return *(uintptr_t*)symbol;
}
bool VMStructs::init(NativeCodeCache* libjvm) {
if (available()) {
return true;
}
void VMStructs::init(NativeCodeCache* libjvm) {
_libjvm = libjvm;
uintptr_t entry = readSymbol(libjvm, "gHotSpotVMStructs");
uintptr_t stride = readSymbol(libjvm, "gHotSpotVMStructEntryArrayStride");
uintptr_t type_offset = readSymbol(libjvm, "gHotSpotVMStructEntryTypeNameOffset");
uintptr_t field_offset = readSymbol(libjvm, "gHotSpotVMStructEntryFieldNameOffset");
uintptr_t offset_offset = readSymbol(libjvm, "gHotSpotVMStructEntryOffsetOffset");
uintptr_t address_offset = readSymbol(libjvm, "gHotSpotVMStructEntryAddressOffset");
initOffsets();
initJvmFunctions();
JNIEnv* env = VM::jni();
initThreadBridge(env);
initLogging(env);
env->ExceptionClear();
}
void VMStructs::initOffsets() {
uintptr_t entry = readSymbol("gHotSpotVMStructs");
uintptr_t stride = readSymbol("gHotSpotVMStructEntryArrayStride");
uintptr_t type_offset = readSymbol("gHotSpotVMStructEntryTypeNameOffset");
uintptr_t field_offset = readSymbol("gHotSpotVMStructEntryFieldNameOffset");
uintptr_t offset_offset = readSymbol("gHotSpotVMStructEntryOffsetOffset");
uintptr_t address_offset = readSymbol("gHotSpotVMStructEntryAddressOffset");
if (entry == 0 || stride == 0) {
return false;
return;
}
while (true) {
const char* type = *(const char**)(entry + type_offset);
const char* field = *(const char**)(entry + field_offset);
if (type == NULL || field == NULL) {
return available();
break;
}
if (strcmp(type, "Klass") == 0) {
@@ -67,25 +104,137 @@ bool VMStructs::init(NativeCodeCache* libjvm) {
} else if (strcmp(type, "Symbol") == 0) {
if (strcmp(field, "_length") == 0) {
_symbol_length_offset = *(int*)(entry + offset_offset);
} else if (strcmp(field, "_length_and_refcount") == 0) {
_symbol_length_and_refcount_offset = *(int*)(entry + offset_offset);
} else if (strcmp(field, "_body") == 0) {
_symbol_body_offset = *(int*)(entry + offset_offset);
}
} else if (strcmp(type, "InstanceKlass") == 0) {
if (strcmp(field, "_class_loader_data") == 0) {
_class_loader_data_offset = *(int*)(entry + offset_offset);
} else if (strcmp(field, "_methods") == 0) {
_methods_offset = *(int*)(entry + offset_offset);
}
} else if (strcmp(type, "java_lang_Class") == 0) {
if (strcmp(field, "_klass_offset") == 0) {
_class_klass_offset = **(int**)(entry + address_offset);
int klass_offset = **(int**)(entry + address_offset);
_klass = (jfieldID)(uintptr_t)(klass_offset << 2 | 2);
}
} else if (strcmp(type, "JavaThread") == 0) {
if (strcmp(field, "_osthread") == 0) {
_thread_osthread_offset = *(int*)(entry + offset_offset);
} else if (strcmp(field, "_anchor") == 0) {
_thread_anchor_offset = *(int*)(entry + offset_offset);
} else if (strcmp(field, "_thread_state") == 0) {
_thread_state_offset = *(int*)(entry + offset_offset);
}
} else if (strcmp(type, "OSThread") == 0) {
if (strcmp(field, "_thread_id") == 0) {
_osthread_id_offset = *(int*)(entry + offset_offset);
}
} else if (strcmp(type, "JavaFrameAnchor") == 0) {
if (strcmp(field, "_last_Java_sp") == 0) {
_anchor_sp_offset = *(int*)(entry + offset_offset);
} else if (strcmp(field, "_last_Java_pc") == 0) {
_anchor_pc_offset = *(int*)(entry + offset_offset);
}
} else if (strcmp(type, "CodeBlob") == 0) {
if (strcmp(field, "_frame_size") == 0) {
_frame_size_offset = *(int*)(entry + offset_offset);
}
} else if (strcmp(type, "Universe") == 0) {
if (strcmp(field, "_collectedHeap") == 0) {
_collected_heap_addr = **(char***)(entry + address_offset);
}
} else if (strcmp(type, "CollectedHeap") == 0) {
if (strcmp(field, "_is_gc_active") == 0) {
_is_gc_active_offset = *(int*)(entry + offset_offset);
}
} else if (strcmp(type, "PermGen") == 0) {
_has_perm_gen = true;
}
entry += stride;
}
_has_class_names = _klass_name_offset >= 0
&& (_symbol_length_offset >= 0 || _symbol_length_and_refcount_offset >= 0)
&& _symbol_body_offset >= 0
&& _klass != NULL;
}
void VMStructs::initJvmFunctions() {
_get_stack_trace = (GetStackTraceFunc)_libjvm->findSymbol("_ZN8JvmtiEnv13GetStackTraceEP10JavaThreadiiP15_jvmtiFrameInfoPi");
if (_get_stack_trace == NULL) {
_get_stack_trace = (GetStackTraceFunc)_libjvm->findSymbol("_ZN8JvmtiEnv13GetStackTraceEP10JavaThreadiiP14jvmtiFrameInfoPi");
}
_unsafe_park = (UnsafeParkFunc)_libjvm->findSymbol("Unsafe_Park");
if (_unsafe_park == NULL) {
// In some macOS builds of JDK 11 Unsafe_Park appears to have a C++ decorated name
_unsafe_park = (UnsafeParkFunc)_libjvm->findSymbol("_ZL11Unsafe_ParkP7JNIEnv_P8_jobjecthl");
}
if (_frame_size_offset >= 0) {
_find_blob = (FindBlobFunc)_libjvm->findSymbol("_ZN9CodeCache16find_blob_unsafeEPv");
if (_find_blob == NULL) {
_find_blob = (FindBlobFunc)_libjvm->findSymbol("_ZN9CodeCache9find_blobEPv");
}
}
if (VM::hotspot_version() == 8 && _class_loader_data_offset >= 0 && _methods_offset >= 0 && _klass != NULL) {
_lock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor28lock_without_safepoint_checkEv");
_unlock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor6unlockEv");
_has_class_loader_data = _lock_func != NULL && _unlock_func != NULL;
}
}
void VMStructs::initThreadBridge(JNIEnv* env) {
// Get eetop field - a bridge from Java Thread to VMThread
jthread thread;
if (VM::jvmti()->GetCurrentThread(&thread) != 0) {
return;
}
jclass thread_class = env->GetObjectClass(thread);
_eetop = env->GetFieldID(thread_class, "eetop", "J");
_tid = env->GetFieldID(thread_class, "tid", "J");
if (_eetop == NULL || _tid == NULL) {
return;
}
VMThread* vm_thread = VMThread::fromJavaThread(env, thread);
if (vm_thread == NULL) {
return;
}
// Workaround for JDK-8132510: it's not safe to call GetEnv() inside a signal handler
// since JDK 9, so we do it only for threads already registered in ThreadLocalStorage
for (int i = 0; i < 1024; i++) {
if (pthread_getspecific((pthread_key_t)i) == vm_thread) {
_tls_index = i;
break;
}
}
if (_tls_index < 0) {
return;
}
_env_offset = (intptr_t)env - (intptr_t)vm_thread;
_has_thread_bridge = true;
}
void VMStructs::initLogging(JNIEnv* env) {
// Workaround for JDK-8238460
if (VM::hotspot_version() >= 15) {
VMManagement* management = VM::management();
if (management != NULL) {
management->ExecuteDiagnosticCommand(env, env->NewStringUTF("VM.log what=jni+resolve=error"));
}
}
}
VMThread* VMThread::current() {
return (VMThread*)pthread_getspecific((pthread_key_t)_tls_index);
}

183
src/vmStructs.h Executable file → Normal file
View File

@@ -17,36 +17,103 @@
#ifndef _VMSTRUCTS_H
#define _VMSTRUCTS_H
#include <jvmti.h>
#include <stdint.h>
#include "codeCache.h"
class VMStructs {
protected:
static NativeCodeCache* _libjvm;
static bool _has_class_names;
static bool _has_class_loader_data;
static bool _has_thread_bridge;
static bool _has_perm_gen;
static int _klass_name_offset;
static int _symbol_length_offset;
static int _symbol_length_and_refcount_offset;
static int _symbol_body_offset;
static int _class_klass_offset;
static int _class_loader_data_offset;
static int _methods_offset;
static int _thread_osthread_offset;
static int _thread_anchor_offset;
static int _thread_state_offset;
static int _osthread_id_offset;
static bool _has_perm_gen;
static int _anchor_sp_offset;
static int _anchor_pc_offset;
static int _frame_size_offset;
static int _is_gc_active_offset;
static char* _collected_heap_addr;
static jfieldID _eetop;
static jfieldID _tid;
static jfieldID _klass;
static int _tls_index;
static intptr_t _env_offset;
typedef void* (*FindBlobFunc)(const void*);
static FindBlobFunc _find_blob;
typedef void (*LockFunc)(void*);
static LockFunc _lock_func;
static LockFunc _unlock_func;
static uintptr_t readSymbol(const char* symbol_name);
static void initOffsets();
static void initJvmFunctions();
static void initThreadBridge(JNIEnv* env);
static void initLogging(JNIEnv* env);
const char* at(int offset) {
return (const char*)this + offset;
}
public:
static bool init(NativeCodeCache* libjvm);
static void init(NativeCodeCache* libjvm);
static bool available() {
return _klass_name_offset >= 0
&& _symbol_length_offset >= 0
&& _symbol_body_offset >= 0
&& _class_klass_offset >= 0;
static NativeCodeCache* libjvm() {
return _libjvm;
}
static bool hasPermGen() {
return _has_perm_gen;
static bool hasClassNames() {
return _has_class_names;
}
static bool hasClassLoaderData() {
return _has_class_loader_data;
}
static bool hasThreadBridge() {
return _has_thread_bridge;
}
typedef jvmtiError (*GetStackTraceFunc)(void* self, void* thread,
jint start_depth, jint max_frame_count,
jvmtiFrameInfo* frame_buffer, jint* count_ptr);
static GetStackTraceFunc _get_stack_trace;
typedef void (JNICALL *UnsafeParkFunc)(JNIEnv*, jobject, jboolean, jlong);
static UnsafeParkFunc _unsafe_park;
};
class MethodList {
public:
enum { SIZE = 8 };
private:
intptr_t _method[SIZE];
int _ptr;
MethodList* _next;
int _padding;
public:
MethodList(MethodList* next) : _ptr(0), _next(next), _padding(0) {
for (int i = 0; i < SIZE; i++) {
_method[i] = 0x37;
}
}
};
@@ -54,7 +121,11 @@ class VMStructs {
class VMSymbol : VMStructs {
public:
unsigned short length() {
return *(unsigned short*) at(_symbol_length_offset);
if (_symbol_length_offset >= 0) {
return *(unsigned short*) at(_symbol_length_offset);
} else {
return *(unsigned int*) at(_symbol_length_and_refcount_offset) >> 16;
}
}
const char* body() {
@@ -62,8 +133,39 @@ class VMSymbol : VMStructs {
}
};
class ClassLoaderData : VMStructs {
private:
void* mutex() {
return *(void**) at(sizeof(uintptr_t) * 3);
}
public:
void lock() {
_lock_func(mutex());
}
void unlock() {
_unlock_func(mutex());
}
MethodList** methodList() {
return (MethodList**) at(sizeof(uintptr_t) * 6 + 8);
}
};
class VMKlass : VMStructs {
public:
static VMKlass* fromJavaClass(JNIEnv* env, jclass cls) {
if (_has_perm_gen) {
jobject klassOop = env->GetObjectField(cls, _klass);
return (VMKlass*)(*(uintptr_t**)klassOop + 2);
} else if (sizeof(VMKlass*) == 8) {
return (VMKlass*)(uintptr_t)env->GetLongField(cls, _klass);
} else {
return (VMKlass*)(uintptr_t)env->GetIntField(cls, _klass);
}
}
static VMKlass* fromHandle(uintptr_t handle) {
if (_has_perm_gen) {
// On JDK 7 KlassHandle is a pointer to klassOop, hence one more indirection
@@ -76,18 +178,34 @@ class VMKlass : VMStructs {
VMSymbol* name() {
return *(VMSymbol**) at(_klass_name_offset);
}
};
class java_lang_Class : VMStructs {
public:
VMKlass* klass() {
return *(VMKlass**) at(_class_klass_offset);
ClassLoaderData* classLoaderData() {
return *(ClassLoaderData**) at(_class_loader_data_offset);
}
int methodCount() {
int* methods = *(int**) at(_methods_offset);
return methods == NULL ? 0 : *methods & 0xffff;
}
};
class VMThread : VMStructs {
public:
static bool available() {
static VMThread* current();
static VMThread* fromJavaThread(JNIEnv* env, jthread thread) {
return (VMThread*)(uintptr_t)env->GetLongField(thread, _eetop);
}
static VMThread* fromEnv(JNIEnv* env) {
return (VMThread*)((intptr_t)env - _env_offset);
}
static jlong javaThreadId(JNIEnv* env, jthread thread) {
return env->GetLongField(thread, _tid);
}
static bool hasNativeId() {
return _thread_osthread_offset >= 0 && _osthread_id_offset >= 0;
}
@@ -95,6 +213,37 @@ class VMThread : VMStructs {
const char* osthread = *(const char**) at(_thread_osthread_offset);
return *(int*)(osthread + _osthread_id_offset);
}
int state() {
return _thread_state_offset >= 0 ? *(int*) at(_thread_state_offset) : 0;
}
uintptr_t& lastJavaSP() {
return *(uintptr_t*) (at(_thread_anchor_offset) + _anchor_sp_offset);
}
uintptr_t& lastJavaPC() {
return *(uintptr_t*) (at(_thread_anchor_offset) + _anchor_pc_offset);
}
};
class RuntimeStub : VMStructs {
public:
static RuntimeStub* findBlob(const void* pc) {
return _find_blob != NULL ? (RuntimeStub*)_find_blob(pc) : NULL;
}
int frameSize() {
return *(int*) at(_frame_size_offset);
}
};
class CollectedHeap : VMStructs {
public:
static bool isGCActive() {
return _collected_heap_addr != NULL && _is_gc_active_offset >= 0 &&
_collected_heap_addr[_is_gc_active_offset] != 0;
}
};
#endif // _VMSTRUCTS_H

149
src/wallClock.cpp Normal file
View File

@@ -0,0 +1,149 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <signal.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include "wallClock.h"
#include "profiler.h"
// Maximum number of threads sampled in one iteration. This limit serves as a throttle
// when generating profiling signals. Otherwise applications with too many threads may
// suffer from a big profiling overhead. Also, keeping this limit low enough helps
// to avoid contention on a spin lock inside Profiler::recordSample().
const int THREADS_PER_TICK = 8;
// Set the hard limit for thread walking interval to 100 microseconds.
// Smaller intervals are practically unusable due to large overhead.
const long MIN_INTERVAL = 100000;
// Stop profiling thread with this signal. The same signal is used inside JDK to interrupt I/O operations.
const int WAKEUP_SIGNAL = SIGIO;
long WallClock::_interval;
bool WallClock::_sample_idle_threads;
void WallClock::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
ExecutionEvent event;
event._thread_state = _sample_idle_threads ? Profiler::_instance.getThreadState(ucontext) : THREAD_RUNNING;
Profiler::_instance.recordSample(ucontext, _interval, 0, &event);
}
void WallClock::wakeupHandler(int signo) {
// Dummy handler for interrupting syscalls
}
long WallClock::adjustInterval(long interval, int thread_count) {
if (thread_count > THREADS_PER_TICK) {
interval /= (thread_count + THREADS_PER_TICK - 1) / THREADS_PER_TICK;
}
return interval;
}
void WallClock::sleep(long interval) {
struct timespec timeout;
timeout.tv_sec = interval / 1000000000;
timeout.tv_nsec = interval % 1000000000;
nanosleep(&timeout, NULL);
}
Error WallClock::start(Arguments& args) {
if (args._interval < 0) {
return Error("interval must be positive");
}
_sample_idle_threads = strcmp(args._event_desc, EVENT_WALL) == 0;
// Increase default interval for wall clock mode due to larger number of sampled threads
_interval = args._interval ? args._interval : (_sample_idle_threads ? DEFAULT_INTERVAL * 5 : DEFAULT_INTERVAL);
OS::installSignalHandler(SIGVTALRM, signalHandler);
OS::installSignalHandler(WAKEUP_SIGNAL, NULL, wakeupHandler);
_running = true;
if (pthread_create(&_thread, NULL, threadEntry, this) != 0) {
return Error("Unable to create timer thread");
}
return Error::OK;
}
void WallClock::stop() {
_running = false;
pthread_kill(_thread, WAKEUP_SIGNAL);
pthread_join(_thread, NULL);
}
void WallClock::timerLoop() {
int self = OS::threadId();
ThreadFilter* thread_filter = Profiler::_instance.threadFilter();
bool thread_filter_enabled = thread_filter->enabled();
bool sample_idle_threads = _sample_idle_threads;
ThreadList* thread_list = OS::listThreads();
long long next_cycle_time = OS::nanotime();
while (_running) {
if (!_enabled) {
sleep(_interval);
continue;
}
if (sample_idle_threads) {
// Try to keep the wall clock interval stable, regardless of the number of profiled threads
int estimated_thread_count = thread_filter_enabled ? thread_filter->size() : thread_list->size();
next_cycle_time += adjustInterval(_interval, estimated_thread_count);
}
for (int count = 0; count < THREADS_PER_TICK; ) {
int thread_id = thread_list->next();
if (thread_id == -1) {
thread_list->rewind();
break;
}
if (thread_id == self || (thread_filter_enabled && !thread_filter->accept(thread_id))) {
continue;
}
if (sample_idle_threads || OS::threadState(thread_id) == THREAD_RUNNING) {
if (OS::sendSignalToThread(thread_id, SIGVTALRM)) {
count++;
}
}
}
if (sample_idle_threads) {
long long current_time = OS::nanotime();
if (next_cycle_time - current_time > MIN_INTERVAL) {
sleep(next_cycle_time - current_time);
} else {
next_cycle_time = current_time + MIN_INTERVAL;
sleep(MIN_INTERVAL);
}
} else {
sleep(_interval);
}
}
delete thread_list;
}

60
src/wallClock.h Normal file
View File

@@ -0,0 +1,60 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _WALLCLOCK_H
#define _WALLCLOCK_H
#include <jvmti.h>
#include <signal.h>
#include <pthread.h>
#include "engine.h"
class WallClock : public Engine {
private:
static long _interval;
static bool _sample_idle_threads;
volatile bool _running;
pthread_t _thread;
void timerLoop();
static void* threadEntry(void* wall_clock) {
((WallClock*)wall_clock)->timerLoop();
return NULL;
}
static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext);
static void wakeupHandler(int signo);
static long adjustInterval(long interval, int thread_count);
static void sleep(long interval);
public:
const char* name() {
return _sample_idle_threads ? EVENT_WALL : EVENT_CPU;
}
const char* units() {
return "ns";
}
Error start(Arguments& args);
void stop();
};
#endif // _WALLCLOCK_H

9
test/AllocatingTarget.java Executable file → Normal file
View File

@@ -1,4 +1,4 @@
import java.util.concurrent.ThreadLocalRandom;
import java.util.Random;
public class AllocatingTarget implements Runnable {
public static volatile Object sink;
@@ -10,13 +10,14 @@ public class AllocatingTarget implements Runnable {
@Override
public void run() {
Random random = new Random();
while (true) {
allocate();
allocate(random);
}
}
private static void allocate() {
if (ThreadLocalRandom.current().nextBoolean()) {
private static void allocate(Random random) {
if (random.nextBoolean()) {
sink = new int[128 * 1000];
} else {
sink = new Integer[128 * 1000];

21
test/LoadLibraryTest.java Normal file
View File

@@ -0,0 +1,21 @@
import java.lang.management.ClassLoadingMXBean;
import java.lang.management.ManagementFactory;
class LoadLibraryTest {
public static void main(String[] args) throws Exception {
for (int i = 0; i < 200; i++) {
Thread.sleep(10);
}
// Late load of libmanagement.so
ClassLoadingMXBean bean = ManagementFactory.getClassLoadingMXBean();
long n = 0;
while (n >= 0) {
n += bean.getLoadedClassCount();
n += bean.getTotalLoadedClassCount();
n += bean.getUnloadedClassCount();
}
}
}

1
test/Target.java Executable file → Normal file
View File

@@ -1,4 +1,3 @@
import java.util.Scanner;
import java.io.File;
class Target {

35
test/ThreadsTarget.java Normal file
View File

@@ -0,0 +1,35 @@
import java.math.BigInteger;
public class ThreadsTarget {
public static void main(String[] args) {
new Thread(new Runnable() {
@Override
public void run() {
methodForThreadEarlyEnd();
}
}, "ThreadEarlyEnd").start();
new Thread(new Runnable() {
@Override
public void run() {
Thread.currentThread().setName("RenamedThread");
methodForRenamedThread();
}
}, "ThreadWillBeRenamed").start();
}
static void methodForThreadEarlyEnd() {
long now = System.currentTimeMillis();
BigInteger counter = BigInteger.ZERO;
while (System.currentTimeMillis() - now < 300) {
counter = counter.nextProbablePrime();
}
}
static void methodForRenamedThread() {
long now = System.currentTimeMillis();
BigInteger counter = BigInteger.ZERO;
while (System.currentTimeMillis() - now < 1000) {
counter = counter.nextProbablePrime();
}
}
}

View File

@@ -3,16 +3,15 @@
set -e # exit on any failure
set -x # print all executed lines
if [ -z "${JAVA_HOME}" ]
then
if [ -z "${JAVA_HOME}" ]; then
echo "JAVA_HOME is not set"
exit 1
fi
(
cd $(dirname $0)
if [ "AllocatingTarget.class" -ot "AllocatingTarget.java" ]
then
if [ "AllocatingTarget.class" -ot "AllocatingTarget.java" ]; then
${JAVA_HOME}/bin/javac AllocatingTarget.java
fi
@@ -32,6 +31,6 @@ fi
fi
}
assert_string "AllocThread-1;.*AllocatingTarget.allocate;.*java.lang.Integer\[\]"
assert_string "AllocThread-2;.*AllocatingTarget.allocate;.*int\[\]"
assert_string "\[AllocThread-1 tid=[0-9]\+\];.*AllocatingTarget.allocate;.*java.lang.Integer\[\]"
assert_string "\[AllocThread-2 tid=[0-9]\+\];.*AllocatingTarget.allocate;.*int\[\]"
)

35
test/load-library-test.sh Executable file
View File

@@ -0,0 +1,35 @@
#!/bin/bash
set -e # exit on any failure
set -x # print all executed lines
if [ -z "${JAVA_HOME}" ]; then
echo "JAVA_HOME is not set"
exit 1
fi
(
cd $(dirname $0)
if [ "LoadLibraryTest.class" -ot "LoadLibraryTest.java" ]; then
${JAVA_HOME}/bin/javac LoadLibraryTest.java
fi
${JAVA_HOME}/bin/java -agentpath:../build/libasyncProfiler.so LoadLibraryTest &
FILENAME=/tmp/java.trace
JAVAPID=$!
sleep 1 # allow the Java runtime to initialize
../profiler.sh -f $FILENAME -o collapsed -d 5 -i 1ms $JAVAPID
kill $JAVAPID
function assert_string() {
if ! grep -q "$1" $FILENAME; then
exit 1
fi
}
assert_string "Java_sun_management"
)

Some files were not shown because too many files have changed in this diff Show More