Compare commits

...

1019 Commits

Author SHA1 Message Date
Andrei Pangin
f6ca3c1ff8 dumpOtlp() should accept Counter argument (#1728) 2026-04-15 19:38:08 +01:00
Andrei Pangin
86adc1605a Updated CHANGELOG 2026-04-15 16:24:06 +01:00
Andrei Pangin
804df3ac8e #1203: Fix "Instance field not found" when using -Xcheck:jni on JDK 8 2026-04-15 16:22:31 +01:00
Andrei Pangin
8aab346c3b #1727: Allocation profile has wrong units in OTLP format 2026-04-14 15:56:20 +01:00
Andrei Pangin
7bd911a007 Release 4.4 2026-04-13 22:33:42 +01:00
Andrei Pangin
2df2733d1d #1676: Make dwarf stack walking mode an alias for vm
Co-authored-by: Bara' Hasheesh <bara.hasheesh@gmail.com>
2026-04-13 21:51:25 +01:00
Andrei Pangin
4d5441f2cd Retry vDSO unwinding on AArch64 using the default frame (#1724) 2026-04-13 20:05:15 +01:00
Andrei Pangin
cc9e91bd8f Retry vDSO unwinding on AArch64 using the default frame (#1724)
Co-authored-by: Bara' Hasheesh <bara.hasheesh@gmail.com>
2026-04-13 15:53:56 +01:00
Andrei Pangin
e899de6a9c #1720: Dark mode toggle in HTML FlameGraph
Co-authored-by: Enrique Wood <ewoodg@hotmail.com>
2026-04-11 02:01:37 +01:00
Andrei Pangin
fbc3942095 Do not walk past virtual thread continuation barriers 2026-04-10 18:49:06 +01:00
Andrei Pangin
6afb9572c1 Use streq/startsWith instead of strcmp/strncmp 2026-04-10 17:25:11 +01:00
Hope Kim
f763e195ee Correct mmap failure check on macOS (#1713) 2026-04-08 22:45:37 +01:00
Bara' Hasheesh
f1b87ead07 Cleanup JVM detection for the test runner (#1717) 2026-04-01 15:48:08 +01:00
Andrei Pangin
4dda6c40af #1716: Wall-clock Heatmap does not count samples correctly 2026-04-01 01:17:50 +01:00
Andrei Pangin
264b8ab5da #1715: Fix Zing crash when profiling cpu+wall together 2026-03-31 00:19:39 +01:00
Andrei Pangin
c383a35ff4 Adjust limits for tests sensitive to CPU time 2026-03-27 14:38:10 +00:00
Diego Lovison
82ae80a660 doc: Improve readability of the jfrconv conversion table (#1711) 2026-03-26 13:19:44 +00:00
Andrei Pangin
7e92b5cdac Fix GHA test failures 2026-03-26 00:12:24 +00:00
Andrei Pangin
fe69e4fab2 An option to truncate deep stacks (#1706) 2026-03-25 23:51:55 +00:00
Andrei Pangin
d94581c24c Workaround for JFR shutdown race (#1707) 2026-03-25 22:51:24 +00:00
alevymyers
f3c31942fb Ensure remaining buffer is sufficient for event data in JfrReader (#1697) 2026-03-24 20:49:02 +00:00
Andrei Pangin
a246ced814 An option to limit size of the call trace storage (#1705) 2026-03-23 17:07:33 +00:00
Andrei Pangin
8d653dd5e0 Unify uses of gcc atomics (#1704) 2026-03-20 16:14:48 +00:00
Andrei Pangin
cc0eab1789 Speed-up stack walking by optimizing nmethod name comparison (#1701) 2026-03-18 18:05:58 +00:00
Andrei Pangin
842b612e08 Allow negative keys in JFR constant pool (#1699) 2026-03-17 15:01:00 +00:00
Andrei Pangin
ff4336d136 #1672: Flamegraph: use Ctrl+Click in addition to Alt+Click to remove stacks 2026-03-16 23:44:03 +00:00
Dan Lutker
e1dd4c05f6 Adding a multi-threaded test executor (#1688) 2026-03-10 22:40:54 +00:00
Dan Lutker
174dc31d88 Add workflow_dispatch for manual runs (#1693) 2026-02-26 18:07:10 +00:00
korniltsev-grafanista
dbd9fc7520 Fix parsing /proc/self/stat (#1690) 2026-02-20 02:10:55 +00:00
Andrei Pangin
dc69cf4b80 Unwind AArch64 generated stubs on JDK 26+ (#1684) 2026-02-11 16:03:40 +00:00
Andrei Pangin
abc8b7f493 #1686: Clarify table allocation logic in CallTraceStorage 2026-02-11 15:57:49 +00:00
Bara' Hasheesh
4ea8e5bbb6 Remove duplicate CodeCache lookup operation in walkVM (#1682) 2026-02-10 16:26:27 +00:00
Bara' Hasheesh
71ad47a46e More AGCT cleanup after removal of recovery tricks (#1683) 2026-02-09 13:46:57 +00:00
Andrei Pangin
0023021ddf #1675: Cleanup AGCT recovery tricks and remove safemode option 2026-02-08 01:38:34 +00:00
Andrei Pangin
444d0e6353 Suppress gcc warning 'parameter passing for argument changed' 2026-02-07 18:23:06 +00:00
Andrei Pangin
7e2ed0e77e #1677: Remove cstack=lbr option 2026-02-07 16:52:47 +00:00
Andrei Pangin
68244fbf6f #1678: Eliminate redundant listing of profiler arguments 2026-02-06 23:20:30 +00:00
Bara' Hasheesh
31042f13bc An option to select TLAB based AllocTracer engine with JDK 11+ (#1671) 2026-02-05 10:56:06 +00:00
Andrei Pangin
a3c6d92d39 Permanently remove check command (#1673) 2026-02-05 10:47:59 +00:00
Andrei Pangin
036c87e50d Differential Flame Graphs (#1553) 2026-02-05 00:00:48 +00:00
Andrei Pangin
15b1161f57 Move converter Main class to the one.convert package (#1670) 2026-02-04 01:13:21 +00:00
Andrei Pangin
b02434bd9d #1268: Documentation should refer to libasyncProfiler.dylib on macOS 2026-02-01 01:07:26 +00:00
Johannes Bechberger
9c293283f2 Mention DebugNonSafepoints flag in "Getting Started" (#1270)
Signed-off-by: Johannes Bechberger <johannes.bechberger@sap.com>
Co-authored-by: Andrei Pangin <1749416+apangin@users.noreply.github.com>
2026-01-31 21:09:25 +00:00
Andrei Pangin
3aba5ee521 #1668: Document --include/--exclude as non-JFR options 2026-01-31 19:01:48 +00:00
Andrei Pangin
078935591f Removed extra spaces 2026-01-23 18:16:16 +00:00
Vishal Chand
dc88d3f756 #1585: Scale perf counter in case of multiplexing (#1631) 2026-01-23 18:13:03 +00:00
Andrei Pangin
a071e8a2f8 Specify JAR manifest in Maven build 2026-01-23 01:21:31 +00:00
Bara' Hasheesh
6e6acc1769 Fix recordCpuMultiEngine test failure when kernel profiling is restricted (#1664) 2026-01-21 16:28:16 +00:00
Andrei Pangin
709a777393 Updated links to v4.3 2026-01-21 00:00:58 +00:00
Francesco Andreuzzi
b9d6843ae5 Provide non-aggregated samples in OTLP converter (#1660) 2026-01-20 00:28:09 +00:00
Long Yang
3722d05ba0 #1528: Add a hard-coded limit on the maximum number of jmethodIDs (#1656) 2026-01-19 12:46:30 +00:00
Francesco Andreuzzi
872be63220 Fix VMStructs::initJvmFunctions call order (#1658) 2026-01-15 10:58:30 +00:00
Bara' Hasheesh
a89d7ddeba Prefer perf-events engine when record-cpu or target-cpu are selected (#1654) 2026-01-15 10:29:04 +00:00
Andrei Pangin
f789c4f748 CI: Fix job dependencies 2026-01-14 02:37:42 +00:00
Andrei Pangin
d43d328b58 Fix trailing spaces 2026-01-14 02:23:19 +00:00
Andrei Pangin
037c09906d Updated CHANGELOG 2026-01-14 02:14:27 +00:00
Andrei Pangin
f352361814 Swapped toolbar icon colors 2026-01-13 19:53:19 +00:00
Vishesh Ruparelia
19b22efeff Support include/exclude flag for JFR to pprof conversion (#1655) 2026-01-13 19:21:56 +00:00
Andrei Pangin
16fdebf78c #1140: Flamegraph hot keys should not trigger default browser action
Co-authored-by: Kerem Kat <keremkat@gmail.com>
2026-01-13 18:12:22 +00:00
Andrei Pangin
0b73f655a8 #1140: Flamegraph improvements: legend, hot keys, new toolbar icons
Co-authored-by: Kerem Kat <keremkat@gmail.com>
2026-01-13 18:01:59 +00:00
Andrei Pangin
cf4739a61b #1653: Disable currentTimeMillis test on JDK 11 2026-01-12 19:54:48 +00:00
Bara' Hasheesh
757bf8edd3 Disallow incorrect usage of jfrsync (#1647) 2026-01-12 19:10:21 +00:00
Vishesh Ruparelia
d0d16240d4 Support include/exclude flag for JFR to OTLP conversion (#1635) 2026-01-12 19:10:05 +00:00
Bara' Hasheesh
fc9b5c85cf Do not record cpu frame on non-perf samples (#1651) 2026-01-12 13:47:33 +00:00
Francesco Andreuzzi
436d5b5066 Fix stop sequence in Profiler::start (#1648) 2026-01-10 04:21:18 +03:00
Vishesh Ruparelia
4663784b98 Support include/exclude flag for JFR to heatmap conversion (#1633) 2026-01-08 16:51:09 +03:00
Francesco Andreuzzi
d2172a6382 Add missing stub for com.google.protobuf.ProtocolStringList (#1644) 2026-01-08 15:25:47 +03:00
Francesco Andreuzzi
93b6ae376d Address breaking changes in OTLP 1.9.0 (#1624) 2026-01-08 03:03:24 +03:00
Andrei Pangin
ee4ac6e888 Unwind vDSO correctly on Linux-ARM64 (#1641) 2026-01-08 02:19:52 +03:00
Francesco Andreuzzi
865e8b91f8 Add timeSpan to WallClockSample (#1640) 2026-01-08 01:55:47 +03:00
Bara' Hasheesh
2a4f329cba Fix issue where the jfrconv uses native lock samples for leak detection (#1630) 2025-12-23 12:24:33 +00:00
Andrei Pangin
9c425ca74f Harden crash protection in StackWalker (#1629) 2025-12-21 23:17:38 +00:00
Andrei Pangin
d871819848 Stylistic changes after #1628 2025-12-20 20:59:10 +00:00
Andrew Azores
bf84fadb3c Implement -javaagent premain/agentmain, register MXBean (#1628) 2025-12-20 20:36:06 +00:00
Bara' Hasheesh
fde780e275 Fix timeout & loop combined usage (#1625) 2025-12-19 11:11:34 +00:00
Francesco Andreuzzi
6e04336375 Filter JFR events by latency (#1620) 2025-12-16 20:10:06 +00:00
Bara' Hasheesh
a77d091e08 asprof stop fails when called after VM shutdown (#1623) 2025-12-16 10:46:56 +00:00
Bara' Hasheesh
999f0c7ae3 Prevent profiler from deadlocking the JVM death when using jfrsync (#1619) 2025-12-12 12:13:36 +00:00
Bara' Hasheesh
cdaf6e76ba Create a GraalVM filter for async-profiler tests (#1611) 2025-12-10 22:45:57 +00:00
Francesco Andreuzzi
3a493bedc4 Fix code cache memory leak in lock profiling while looping (#1575) 2025-12-08 17:36:06 +00:00
Bara' Hasheesh
7b24ad89b6 Sync jattach sources - avoid busy waiting for dead process (#1615) 2025-12-05 21:15:22 +00:00
Bara' Hasheesh
5bf0e311c2 asprof collect fails when the target process concurrently terminates (#1614) 2025-12-05 20:51:45 +00:00
Andrei Pangin
8772214f7e Updated links to v4.2.1 2025-11-22 03:14:22 +00:00
Jaromir Hamala
3bb1e72d09 Timezone switcher between Local and UTC time in heatmaps (#1530) 2025-11-20 01:01:24 +00:00
Kerem Kat
ea0b34b578 Use ref-cycles instead of cycles in PmuTests to fix flakiness (#1601) 2025-11-19 16:57:43 +00:00
Bara' Hasheesh
e92eb45812 Support running integration tests on a different JDK (#1602) 2025-11-19 16:36:15 +00:00
Bara' Hasheesh
d304fd5d75 Do not fail cacheMisses test if no samples are collected (#1600) 2025-11-19 13:07:23 +00:00
Andrei Pangin
61a676f87f Added missing docs on nativelock and trace options 2025-11-18 23:58:25 +00:00
Andrei Pangin
b855e0c2c4 Follow up: Workaround for the PERF_EVENT_IOC_REFRESH bug (#1599)
Signed-off-by: Andrei Pangin <1749416+apangin@users.noreply.github.com>
2025-11-18 17:26:44 +00:00
Andrei Pangin
763616aa17 Workaround for PERF_EVENT_IOC_REFRESH bug (#1599) 2025-11-18 15:33:16 +00:00
Andrei Pangin
a25e5194bf Do not block any signals during execution of a custom crash handler (#1596) 2025-11-17 10:06:56 +00:00
Andrei Pangin
ff24f1220c Do not set [bug] label automatically 2025-11-14 22:05:01 +00:00
Kerem Kat
124eca439e Fix build with MERGE=false (#1594) 2025-11-14 20:34:16 +00:00
Bara' Hasheesh
4bcfe9ee7b Re-enable ComptaskTests for JDK25 (#1592) 2025-11-14 20:29:56 +00:00
Bara' Hasheesh
0b7ee6d830 Support compilation on modern JDKs. Drop JDK7 support (#1590) 2025-11-14 15:53:37 +00:00
Andrei Pangin
01325ea87c #1584: JfrReader loops on corrupted recordings 2025-11-07 21:36:38 +00:00
Bara' Hasheesh
78172b7cb0 Optimize make targets to not compile C/C++ files when not needed (#1581) 2025-11-06 13:10:00 +00:00
Soumadipta Roy
fd269e6450 Remove more redundant check declarations (#1579) 2025-11-05 13:04:15 +00:00
Soumadipta Roy
61d48a6b43 Remove redundant check declarations (#1577) 2025-11-05 11:56:59 +00:00
Soumadipta Roy
c6c2fc1497 Deprecate check command (#1574) 2025-11-04 23:12:16 +00:00
Bara' Hasheesh
585054661f [test] Change stub frame check to be more generic (#1576) 2025-11-03 17:49:12 +00:00
Soumadipta Roy
0cb40bee11 Reduce mutex and rdlock iterations for nativelock tests (#1571) 2025-10-30 14:44:38 +00:00
Francesco Andreuzzi
8c851ddad2 [GHA] Fix trailing whitespaces checker (#1572) 2025-10-30 12:03:25 +00:00
Francesco Andreuzzi
8fa4fd0b78 Expose async-profiler metrics (#1568) 2025-10-30 00:01:37 +00:00
Francesco Andreuzzi
9611d55567 [GHA] Add checker for trailing whitespace (#1569) 2025-10-29 11:17:05 +00:00
Francesco Andreuzzi
eb4d126a2d Force test runs to use correct async-profiler library (#1565) 2025-10-28 18:24:28 +00:00
Soumadipta Roy
85ae06b177 #1547: Implement native lock profiling (#1549) 2025-10-23 19:24:55 +01:00
Francesco Andreuzzi
872631f82b [GHA] Add more patterns to the EOF newline checker (#1561) 2025-10-23 15:29:19 +01:00
Francesco Andreuzzi
7482988021 [GHA] Add checker for EOF newline (#1560) 2025-10-23 13:39:36 +01:00
Andrei Pangin
e647076de5 Broken formatting in ProfilerOptions doc 2025-10-22 20:37:59 +01:00
Bara' Hasheesh
c478490ce9 Fix record-cpu bug when kernel stacks are not available (#1558) 2025-10-22 18:47:56 +01:00
Bara' Hasheesh
6e10742be1 Fix duplicate native stacks when -F mixed is used with fp/dwarf stack walker (#1546) 2025-10-22 00:59:58 +01:00
Adnan Khan
49e56704f9 ci: scope down GitHub Token permissions (#1556) 2025-10-22 00:46:57 +01:00
Andrei Pangin
79d9058b18 Parse FlameGraph title from HTML input (#1555) 2025-10-21 14:37:50 +01:00
Andrei Pangin
9674d20873 Converter API to parse jfr or collapsed to a FlameGraph object (#1551) 2025-10-21 14:27:23 +01:00
Andrei Pangin
538f3a2e48 Update links to v4.2 in the documentation (#1548) 2025-10-20 16:10:08 +01:00
Andrei Pangin
e35113a647 Added test for the native frame duplication issue (#1545) 2025-10-17 22:44:59 +01:00
Andrei Pangin
8f7e4e19cc Fix duplicated native stacks with perf_events (#1544) 2025-10-17 16:20:10 +01:00
Bara' Hasheesh
88b7ba3838 Add missing nativemem jfr setting (#1542) 2025-10-17 12:00:59 +01:00
Andrei Pangin
7a86354d77 Release 4.2 2025-10-15 22:11:49 +01:00
Andrei Pangin
fa5ada6747 Use VMStructs stack walking mode by default (#1539) 2025-10-15 22:00:25 +01:00
Francesco Andreuzzi
fc2a9b928c Latency Profiling enhancements (#1499) 2025-10-15 20:28:13 +01:00
Andrei Pangin
5aee9cdb03 Support advanced stack walking features with cstack=vm (#1537) 2025-10-15 20:25:19 +01:00
Francesco Andreuzzi
dd0d233499 Skip Instrument::stop in VMDeath callback (#1538) 2025-10-15 19:55:18 +01:00
Bara' Hasheesh
fb673227c7 GHA: replace macos-13 with macos-15-intel (#1527) 2025-10-09 02:47:08 +02:00
Lukas Bloder
1a15a0e86a Expose dictionary that maps event id to javaThreadId (#1526) 2025-10-07 10:47:11 +01:00
Andrei Pangin
ea095462ca Use JavaFrameAnchor to find top Java frame with cstack=vm (#1517) 2025-10-03 14:17:40 +01:00
Soumadipta Roy
3634cdc1ac Fix retryCount final issue (#1522) 2025-10-02 17:54:07 +01:00
Soumadipta Roy
3e663759da #1510: Add option to retry tests with parameterized retry count (#1520) 2025-10-02 17:41:06 +01:00
Bara' Hasheesh
97c35ac96c Fix matching pattern for sys_getdents (#1521) 2025-10-02 17:39:37 +01:00
Bara' Hasheesh
6453ccca43 Fix UnsatisfiedLinkError when tmpdir is set to a relative path (#1515) 2025-10-01 15:24:01 +01:00
Kerem Kat
f9b78102ce Add CPUTimeSample event support to jfrconv (#1475) 2025-09-30 12:56:30 +01:00
Kerem Kat
861f4f4f63 Add Liberica to the CI on Alpaquita musl (#1466) 2025-09-30 12:12:41 +01:00
Bara' Hasheesh
0eba17edd0 Add more GHA jobs to cover JDK versions on ARM (#1508) 2025-09-29 14:59:57 +01:00
Kerem Kat
bfa821b6ce Make workflow names shorter (#1514) 2025-09-26 23:36:50 +01:00
Kerem Kat
e3f646a1d9 Fix integration test dependencies to build and cosmetic changes (#1502) 2025-09-26 11:49:10 +01:00
Francesco Andreuzzi
7338c30d88 Clean up EventMask usages (#1507) 2025-09-26 11:36:15 +01:00
Andrei Pangin
d97a7d3343 Detect if calloc calls malloc (#1500)
Fixes `nativemem` profiling on Alpaquita Linux
2025-09-23 13:27:38 +01:00
Bara' Hasheesh
07b3e747d1 Eliminate period bias in CPU smoke test (#1465) 2025-09-17 18:30:56 +01:00
Andrei Pangin
70a13bcd03 asprof --latency option 2025-09-16 01:08:00 +01:00
Francesco Andreuzzi
6f2a9b80f8 Optimize method tracing when the function is not profiled (#1471) 2025-09-15 21:10:52 +01:00
Bara' Hasheesh
145fc2dd28 Fix processSamplingWithMemoryThreshold possible failure (#1494) 2025-09-12 16:26:08 +01:00
Bara' Hasheesh
6fc51db16e Fix comptask test failure (#1474) 2025-09-11 15:35:33 +01:00
Andrei Pangin
49ae9cfe7f Differentiate Java and non-Java threads using vtable (#1470) 2025-09-10 23:30:11 +01:00
Kerem Kat
fcf2734f56 Reconvert outdated flamegraph.html (#1472) 2025-09-10 19:03:05 +01:00
Andrei Pangin
2c188fe490 Fix compilation with JDK 8 headers 2025-09-09 12:51:49 +01:00
Rohitash Kumar
f6e850c5f5 Fix GetProcessIds_returns_valid_pids test (#1468) 2025-09-09 11:44:11 +01:00
Andrei Pangin
62307a2418 Fix compiler warning about sscanf 2025-09-07 20:46:29 +01:00
Rohitash Kumar
b30f5f1da1 System wide process sampling on Linux (#1411) 2025-09-06 15:11:17 +01:00
Bara' Hasheesh
af8fabe3db Unwind top frame on ARM using link register (#1463) 2025-09-06 03:08:59 +01:00
Francesco Andreuzzi
2e0e3ab792 Java method tracing + Latency profiling (#1421) 2025-09-06 01:06:09 +01:00
Francesco Andreuzzi
9f687fb07e Add -Wunused-variable compilation flag (#1462) 2025-09-03 21:09:23 +01:00
Francesco Andreuzzi
e052d51323 Profile:: _symbol_map is unused (#1456) 2025-08-29 22:28:30 +03:00
Francesco Andreuzzi
6ebadb87cf Thread name in OpenTelemetry output (#1448) 2025-08-29 00:12:11 +03:00
Andrei Pangin
5454c9bf7f Special handling of prologue and epilogue of compiled methods with cstack=vm (#1449) 2025-08-28 11:43:20 +03:00
Bara' Hasheesh
1eb40f446c [test] Compare ContendedLock duration against total thread blocked time (#1443) 2025-08-28 11:42:51 +03:00
Bara' Hasheesh
df063a6f6e Forbid setting begin and end to the same address (#1445) 2025-08-28 11:34:24 +03:00
Bara' Hasheesh
d651a7a326 Unwind dylib stubs as empty frames on macOS (#1450) 2025-08-27 02:52:11 +03:00
Andrei Pangin
512f7b88cf Allow cstack=vm for 32-bit ports 2025-08-24 01:15:13 +03:00
Adina-Andreea Zugravescu
ead97aca72 Fix sized string writer method in protobuf writer (#1447) 2025-08-21 19:14:21 +03:00
Andrei Pangin
6c61fb6c83 Remove harmful -momit-leaf-frame-pointer gcc flag (#1438) 2025-08-18 12:04:06 +01:00
Francesco Andreuzzi
02a8fdb9f5 Allow wildcards in Instrument profiling engine (#1435) 2025-08-14 14:47:26 +01:00
Bara' Hasheesh
0e551b0fef Two wall-clock profilers interfere with each other (#1417) 2025-08-13 15:37:03 +01:00
Francesco Andreuzzi
9b5e3f330a Smoke tests for JFR converter (#1434) 2025-08-13 13:55:00 +01:00
Francesco Andreuzzi
89ead820f0 Per-thread flamegraph option in JFR heatmap converter (#1414) 2025-08-13 13:42:38 +01:00
Francesco Andreuzzi
b320af7ad3 Preserve compatibility with old JFR reader (#1431) 2025-08-07 14:13:38 +01:00
Andrei Pangin
a9e8c8d558 Re-implement SafeAccess (#1427) 2025-08-06 20:07:03 +01:00
Andrei Pangin
ca58e81005 Fixed gcc warning on AArch64 2025-08-06 01:59:58 +01:00
Andrei Pangin
a035e3e4d1 Allow cross-compilation for 32-bit platforms 2025-08-06 01:26:32 +01:00
Bara' Hasheesh
f62a53ed3d More robust checks in parseMultiModeRecording test (#1428) 2025-08-04 18:49:24 +01:00
Bara' Hasheesh
8e64342485 Fix randomly failing tests (#1392) 2025-08-01 16:45:42 +01:00
Francesco Andreuzzi
b6d442b542 Skip hidden directories while looking for tests (#1426) 2025-07-31 15:32:26 +01:00
Francesco Andreuzzi
3237a0ce9b Extract JFR symbol lookup (#1378) 2025-07-31 00:46:54 +01:00
Francesco Andreuzzi
c612cd70e9 [GHA] Disable matrix fail-fast (#1296) 2025-07-30 21:19:35 +01:00
Francesco Andreuzzi
f461a06d23 Proactively check classpath in tests spawning a child process (#1422) 2025-07-30 12:07:46 +01:00
Adina-Andreea Zugravescu
8593be1600 Add time_nanos and duration_nanos to OTLP profiles with test (#1413) 2025-07-30 01:56:23 +01:00
Bara' Hasheesh
88c46da067 Refactor RegularPeak test to use javaagent (#1404) 2025-07-29 12:12:32 +01:00
Francesco Andreuzzi
fc6ffb3d6e Remove some clang-tidy checks (#1419) 2025-07-28 16:29:59 +01:00
Bara' Hasheesh
fd157a8a42 Add synthetic symbols for Mach-O stubs/trampolines (#1416) 2025-07-28 13:47:44 +01:00
Andrei Pangin
2a0bae6e06 #1395: VMStructs unwinding fails on JDK 26 2025-07-28 01:07:20 +01:00
Francesco Andreuzzi
49b7320521 Simplify heatmap JFR converter (#1388) 2025-07-25 13:49:34 +01:00
Andrei Pangin
cdedd3fb22 Change Maven Publisher from OSSRH to Central Portal 2025-07-21 22:18:20 +01:00
Andrei Pangin
da812fca7a Update links to v4.1 in the documentation 2025-07-21 14:30:12 +01:00
Andrei Pangin
5930966a92 Release 4.1 2025-07-21 02:35:48 +01:00
Andrei Pangin
7737df342d Updated CHANGELOG 2025-07-21 02:30:07 +01:00
Andrei Pangin
843f1d9f3e Unwind checksum and digest intrinsics on ARM64 (#1400) 2025-07-21 02:28:54 +01:00
Andrei Pangin
733f2a513c Rolled back invalid fragment from previous commit 2025-07-18 20:16:06 +01:00
Andrei Pangin
9824786981 #1389: Incorrect top frame for synchronous events with cstack=vm on ARM64 (#1399) 2025-07-18 16:30:13 +01:00
Soumadipta Roy
5fffdb1eaa Rewrite jfrconv executable to shell (#1366) 2025-07-17 15:59:20 +01:00
Francesco Andreuzzi
7bf8528f75 Separate workflow for automated clang-tidy review (#1384) 2025-07-16 21:45:52 +01:00
Vishesh Ruparelia
80ae8aed19 Improve stack walking termination logic (#1393) 2025-07-15 15:44:13 +01:00
Bara' Hasheesh
1c1a14c1ec Fix intermittent failures of JfrTests with live option (#1376) 2025-07-15 15:36:20 +01:00
Andrei Pangin
83e9bdd9bd Typo in docs 2025-07-14 18:18:26 +01:00
Bara' Hasheesh
22ce08f5ef #1380: Workaround clang type promotion bug (#1390) 2025-07-14 14:22:53 +01:00
Bara' Hasheesh
7c4385b0b1 JFR writer crashes when using cstack=vmx (#1387) 2025-07-11 13:11:09 +01:00
Bara' Hasheesh
461a3c1b93 Correctly check if profiler is preloaded (#1374) 2025-07-10 18:26:45 +01:00
Francesco Andreuzzi
5b178bfc5c Temporarily disable clang-tidy automatic comments (#1382) 2025-07-10 14:19:56 +01:00
Francesco Andreuzzi
520b897dce Create test/deps if it does not exist before running Makefile recipes (#1375) 2025-07-09 19:58:41 +01:00
Bara' Hasheesh
a70f25e00f Save all generated logs for debug purposes (#1373) 2025-07-09 18:44:37 +01:00
Francesco Andreuzzi
f79729167a Test OTLP output format (#1331) 2025-07-09 13:33:39 +01:00
Bara' Hasheesh
f627b3157b Give tests unique suffix names (#1371) 2025-07-08 17:35:53 +01:00
Francesco Andreuzzi
85fefd2800 Publish clang-tidy comments only for non-draft PRs (#1367) 2025-07-07 19:11:45 +01:00
Francesco Andreuzzi
5091304efd Ensure that only files under src/ are checked in cpp-lint-diff (#1365) 2025-07-07 14:12:13 +01:00
Francesco Andreuzzi
c42bf7ad9d Cancel redundant in-progress GHA runs (#1363) 2025-07-07 11:50:25 +01:00
Francesco Andreuzzi
2b8dffff27 JFR to OTLP converter (#1336) 2025-07-04 19:43:22 +01:00
Francesco Andreuzzi
09ad6c1663 Auto-generated clang-tidy review comments (#1360) 2025-07-04 14:51:48 +01:00
Andrei Pangin
40fd71a8a0 #1358: Do not dereference jmethodIDs on JDK 26 (#1362) 2025-07-04 14:03:10 +01:00
Andrei Pangin
557f4adecb Fix nonjava test failure on Alpine 2025-07-03 20:13:37 +01:00
Andrei Pangin
de54c536dc Do not include excess files in test.jar 2025-07-03 19:36:36 +01:00
Andrei Pangin
c74107e53f Suppress javac warnings when compiling tests 2025-07-03 19:08:00 +01:00
Francesco Andreuzzi
b3968f5e38 Simplify location handling in OTLP (#1361) 2025-07-02 20:45:14 +01:00
Bara' Hasheesh
29dd537907 Correctly unwind stack for malloc events in VM stack walking mode (#1357) 2025-07-01 19:41:54 +01:00
Bara' Hasheesh
0330a6e333 Allow cstack=vmx for native applications (#1354) 2025-07-01 15:29:22 +01:00
Francesco Andreuzzi
9b44c2e99d C++ linting via clang-tidy (#1338) 2025-07-01 12:57:45 +01:00
Kerem Kat
5b4450b85c Fix invalid alignment in mallocTracer and zero-init buf in getTotalCpuTime (#1351) 2025-06-26 16:57:26 +01:00
Andrei Pangin
82d13772a5 Disable JFR OldObjectSample event in jfrsync mode (#1350) 2025-06-25 23:03:03 +01:00
Andrei Pangin
bbca9f1817 [test] Avoid listing files in /tmp 2025-06-18 12:20:48 +01:00
Bara' Hasheesh
981619680e Change stackwalker test checks to be more restrictive (#1341) 2025-06-13 17:12:42 +01:00
Francesco Andreuzzi
2b556680dc Use Index in jfrMetadata (#1337) 2025-06-13 12:47:00 +01:00
Francesco Andreuzzi
b3f58429f5 Support for OTLP Profile signals (#1188) 2025-06-12 01:11:46 +01:00
Andrei Pangin
2844e6c5c1 #1327: Merged jattach memory leak fixes
Co-authored-by: tteokbokki-master <0jin.git@gmail.com>
2025-06-09 14:34:06 +01:00
Andrei Pangin
0e1008531b Fixed misc compilation and test failures 2025-06-05 19:57:35 +01:00
Bara' Hasheesh
19ad42cd23 Enable native memory profiling of async-profiler itself (#1323) 2025-06-03 16:31:02 +01:00
Bara' Hasheesh
f76833a2c0 Add integration test for VM/VMX stack walkers for incomplete frame edge cases (#1321) 2025-06-02 14:04:17 +01:00
Andrei Pangin
4b1df29aab #1319: Accessing osThreadId of a terminating thread may fail (#1324) 2025-06-02 01:43:51 +01:00
Bara' Hasheesh
795da942f7 Enable unit tests related to symbol parsing on macOS (#1315) 2025-06-01 18:18:42 +01:00
Bara' Hasheesh
bedffcb080 Updated tests to verify symbol patching on macOS (#1279) 2025-05-25 13:19:36 +01:00
Andrei Pangin
660ffcd5c6 #1193: Parse non-lazy symbol pointers on macOS
Co-authored-by: Bara' Hasheesh <bara.hasheesh@gmail.com>
2025-05-25 12:54:26 +01:00
Bara' Hasheesh
60e79e364a Prevent from exceeding MAX_NATIVE_LIBS limit (#1312) 2025-05-23 15:41:00 +01:00
Andrei Pangin
d89ab7a16c Skip last 10% allocations for leak detection (#1299) 2025-05-21 13:28:48 +01:00
Francesco Andreuzzi
d042e0a8db Fix comptask test flakyness on JDK8 (#1307) 2025-05-21 10:34:24 +01:00
Andrei Pangin
3256fde4c1 Do not count tests that are not in the include list 2025-05-21 01:15:44 +01:00
Francesco Andreuzzi
3bbab49e3c Add corretto-8 to test matrix (#1274) 2025-05-20 15:38:37 +01:00
Francesco Andreuzzi
ed57317281 Test comptask feature (#1293) 2025-05-20 15:34:03 +01:00
Francesco Andreuzzi
c17de4c220 Guard hook installation with dlopen/dlclose (#1264) 2025-05-20 02:39:43 +01:00
Andrei Pangin
3a9252c677 Allow profiling kprobes/uprobes with --fdtransfer (#1300) 2025-05-19 19:03:21 +01:00
Francesco Andreuzzi
fd8ba8b9ee Fix typo in JfrTests (#1303) 2025-05-19 19:02:45 +01:00
Andrei Pangin
5fe1c47ee3 Removed unused imports 2025-05-18 01:36:21 +01:00
Andrei Pangin
ff203f391a Make sure siginfo is passed to custom SEGV handler (#1298) 2025-05-18 00:41:32 +01:00
Francesco Andreuzzi
ba93f813a9 Protobuf writer (#1292) 2025-05-15 16:12:30 +01:00
JugadK
7c3aa59ceb Record which CPU a sample was taken on (#1286) 2025-05-13 15:30:29 +01:00
Vishesh Ruparelia
25ddfe056b Support "--all" profiling mode (#1281) 2025-05-13 13:41:24 +01:00
Andrei Pangin
7d4157b1c5 Moved and updated policy files 2025-05-07 03:16:58 +01:00
Kerem Kat
b3907b43ed Include debug symbols in the release for the lib (#1271) 2025-05-06 22:24:42 +01:00
Kerem Kat
cf39c3ad9c Remove flaky RaceToLocks test (#1282) 2025-05-06 21:33:13 +01:00
Francesco Andreuzzi
73f0486946 Redirect only stderr from taskset in integration tests (#1276) 2025-05-01 20:01:11 +01:00
Francesco Andreuzzi
f2197dc400 Build and run tests on amazonlinux:2023 and amazonlinux:2 (#1246) 2025-05-01 12:53:45 +01:00
Bara' Hasheesh
8c15cbac99 Fix memory hook installation (#1269) 2025-04-29 17:18:31 +01:00
Francesco Andreuzzi
d2c85c18c6 Build and run tests on Alpine (#1226) 2025-04-28 19:49:49 +01:00
Andrei Pangin
b5b41dcaaa Fix perfEvents tests on systems with restricted perf_event_paranoid 2025-04-28 19:40:10 +01:00
Francesco Andreuzzi
f5fd5b0863 Fix NativememTests#dlopenCustomLib on Alpine (#1254) 2025-04-28 15:55:25 +01:00
Kerem Kat
dbcd94fcd6 Add test name filtering to the java test runner (#1129) 2025-04-28 13:49:39 +01:00
Bara' Hasheesh
570ead9c13 Include asprof.h in async-profiler release (#1262) 2025-04-25 21:52:11 +01:00
Francesco Andreuzzi
471267bea4 Fix nightly publication (#1267) 2025-04-25 16:51:59 +01:00
Francesco Andreuzzi
67076816c1 Separate jobs for build/unit tests and integration tests (#1253) 2025-04-25 16:19:43 +01:00
Francesco Andreuzzi
0c72a8d3e9 Amend amazonlinux:2 image (#1265) 2025-04-25 14:26:08 +01:00
Kerem Kat
fdaf1957c0 Detect and parse the loader in parseLibraries. (#1263) 2025-04-24 19:48:43 +01:00
Francesco Andreuzzi
047a6dea1f Add amazonlinux:2 Docker image with Node.js (#1255) 2025-04-24 17:10:31 +01:00
Francesco Andreuzzi
14c7e819b2 Preferably patch non-dotted symbols when more than one matches the prefix (#1251) 2025-04-22 17:22:36 +01:00
Kerem Kat
fa417c85c8 Add nativemem test with dlopen after profiler start. (#1243) 2025-04-19 01:47:16 +01:00
Francesco Andreuzzi
6d786b7401 Skip compiler optimizations for dummy malloc hooks (#1242) 2025-04-18 12:04:37 +01:00
Francesco Andreuzzi
387dee13b8 Fix musl-specific problems (#1235) 2025-04-17 13:54:31 +01:00
Ariel Ben-Yehuda
164eac4dbd add support for user JFR events (#1223) 2025-04-15 15:12:28 +01:00
Johannes Bechberger
7207fc8775 Replace flamegraph with html in ConverterUsage.md (#1231) 2025-04-15 14:30:14 +01:00
Bara' Hasheesh
b034e4c314 #1174: Detect JVM in non-Java application and attach to it (#1192)
Co-authored-by: Andrei Pangin <noreply@pangin.pro>
2025-04-14 02:16:12 +01:00
Andrei Pangin
39f43006a1 Removed obsolete Makefile target 2025-04-13 21:47:55 +01:00
Andrei Pangin
b9f3456f89 Support PAC on linux-arm64 2025-04-13 19:38:28 +01:00
Andrei Pangin
5ce2c34d3e Fixed unwinding of primordial stack on ARM64 2025-04-13 19:38:27 +01:00
Francesco Andreuzzi
e359d161ba Separate CI jobs for macOS arm64 and x64 (#1212) 2025-04-10 11:38:54 +01:00
Andrei Pangin
7b2d1d9c94 #1222: Update VMStructs for JDK 25 (mutable nmethod data out of CodeCache) 2025-04-10 02:32:47 +01:00
Andrei Pangin
5030fe5faa Minor documentation edits 2025-04-09 02:22:14 +01:00
Andrei Pangin
bc80518125 Update links to v4.0 in the documentation 2025-04-08 18:36:22 +01:00
Andrei Pangin
87b7b42ec6 Release 4.0 2025-04-08 15:55:28 +01:00
Andrei Pangin
80a6e722b6 Bump macos-version-min to 10.15 2025-04-08 15:51:43 +01:00
Andrei Pangin
e66a4ab6d6 Skip patching calloc() on musl to prevent double-accounting 2025-04-08 15:51:22 +01:00
Andrei Pangin
f071146d11 Revert "#1162: Avoid calling JNI GetEnv in a signal handler"
This reverts commit 94a6f9ca61.
2025-04-08 15:25:31 +01:00
Andrei Pangin
ea969d10f9 Updated CHANGELOG 2025-04-08 13:29:34 +01:00
Andrei Pangin
85c72f839f Updated CHANGELOG 2025-04-08 13:22:44 +01:00
Andrei Pangin
6979a9eff2 Heatmap doc 2025-04-08 04:37:23 +01:00
Andrei Pangin
ebd889466b #1125: Use dlopen instead of dl_iterate_phdr for parsing libraries (#1220)
Co-authored-by: Kerem Kat <keremkat@gmail.com>
2025-04-08 01:27:59 +01:00
Kerem Kat
2dba71fcf9 Add event type details to doc. (#1108) 2025-04-07 01:55:00 +01:00
Andrei Pangin
94a6f9ca61 #1162: Avoid calling JNI GetEnv in a signal handler 2025-04-06 22:47:57 +01:00
Soumadipta Roy
87417ec418 Ignore raw call trace id when it is greater than total capacity of table in callTraceStorage. (#1207)
Signed-off-by: Andrei Pangin <1749416+apangin@users.noreply.github.com>
Co-authored-by: Andrei Pangin <1749416+apangin@users.noreply.github.com>
2025-04-05 02:16:54 +01:00
Kerem Kat
41e205b2b2 Add test for so mapped to same offset twice (#1218) 2025-04-05 01:37:34 +01:00
Kerem Kat
6178c17d92 Add test for having different offset and virtaddr for LOAD segment (#1216) 2025-04-05 01:20:22 +01:00
Kerem Kat
177de58865 Always sort sources so zInit is last. (#1219) 2025-04-05 01:06:39 +01:00
Andrei Pangin
49cca40e47 #1176: Calculate .so image_base from LOAD Program Header 2025-04-04 02:42:38 +01:00
Andrei Pangin
ef64ae2f2d #1175: Correct image_base calculation if the same .so is mapped twice at 0 offset 2025-04-04 02:39:33 +01:00
Francesco Andreuzzi
5a853fa26b Amend LockTests#raceToLocks to test profiling results of contended locks (#1204) 2025-04-01 10:10:42 +01:00
Francesco Andreuzzi
ee75d80622 Use pointers to represent children in Trie nodes (#1197) 2025-03-28 16:04:50 +00:00
Bara' Hasheesh
18e3b39db9 Enable CTests on MacOs (#1198) 2025-03-28 16:04:17 +00:00
Francesco Andreuzzi
24007027b3 remove flaky check (#1200) 2025-03-28 15:57:46 +00:00
Kerem Kat
be1380022d test with Corretto 24 instead of 23, now that it is out (#1190) 2025-03-24 10:58:39 +00:00
Andrei Pangin
6761587bb3 #1182: Retain by-thread grouping when reversing FlameGraph (#1189) 2025-03-23 01:24:06 +00:00
Francesco Andreuzzi
788e44dffc Fix compilation with source merging disabled (#1185) 2025-03-21 12:19:02 +00:00
Ariel Ben-Yehuda
7152ba0477 Add asprof_get_thread_local_data (#1169) 2025-03-20 15:20:21 +00:00
Francesco Andreuzzi
eabbd2f796 Fix typo in --inverted docs (#1184) 2025-03-20 10:52:46 +00:00
Francesco Andreuzzi
a78793bed0 An option to sample threads on the selected CPU only (#1180) 2025-03-19 21:19:07 +00:00
Volker Simonis
fe1bc66d4b Implement new '--inverted' option to flip graphs vertically (#1178) 2025-03-16 13:34:11 +00:00
Andrei Pangin
21707d4d7e JDK 25 compatibility 2025-03-16 00:43:50 +00:00
Vishesh Ruparelia
b0dde79fc0 Log when no samples are collected (#1167) 2025-03-14 21:42:46 +00:00
Soumadipta Roy
5ea64a15c3 Add functionality to get unique stacktraces count from JFR output (#1172) 2025-03-11 16:50:14 +00:00
Volker Simonis
1d64e18123 Mention libstdc++-static as a build requirement in Readme.md (#1166)
Signed-off-by: Volker Simonis <volker.simonis@gmail.com>
2025-03-10 14:21:52 +00:00
olivergillespie
f5a7c63d0b Use PERF_FLAG_FD_CLOEXEC with perf_event_open (#1165) 2025-03-10 10:32:12 +00:00
Andrei Pangin
626366b5ad #1161: Fixed itimerTotal test on macOS 2025-03-08 14:35:55 +00:00
Andrei Pangin
21bc5948cf Fixed nativemem tests on CentOS 7 2025-03-08 04:08:11 +00:00
Vishesh Ruparelia
15609cdfa0 Fixed flacky lock test (#1160) 2025-03-07 12:37:10 +00:00
Vishesh Ruparelia
e27198e324 CI/CD: use runson.display for publishing test artifacts instead of runson.name (#1157) 2025-03-03 15:56:09 +00:00
Vishesh Ruparelia
ceb1a3126f Use GitHub hosted runners instead of CodeBuild runners for CI/CD (#1154) 2025-03-01 23:28:24 +00:00
Andrei Pangin
cc76f05e60 #1143: Fix bug in ThreadFilter because of negative thread_id on macOS 2025-02-23 22:20:59 +00:00
Volker Simonis
e6e0494926 Fix 'make test' (#1145) 2025-02-20 20:19:17 +00:00
Ariel Ben-Yehuda
f71c31af7b Add code for getting timestamp from mrs (#1142) 2025-02-18 14:52:47 +00:00
Ariel Ben-Yehuda
2d764ccb63 Support clock=tsc without a JVM (#1123) 2025-02-18 12:26:44 +00:00
Ariel Ben-Yehuda
75aefa36c9 jfr: set the clock setting to the actual rather than attempted clock (#1141) 2025-02-17 18:59:16 +00:00
Andrei Pangin
c1ed9b3169 Redo fix for dlopen recursion on musl 2025-02-14 23:49:39 +00:00
Andrei Pangin
cfb9fa45c3 Fixed compilation failure with gcc12 (#1137) 2025-02-14 22:37:50 +00:00
Andrei Pangin
e6407ee349 Move dlopen(self) out of constructor because of a recursion on Alpine 2025-02-14 22:07:21 +00:00
Kerem Kat
3beae04e9d Support posix_memalign and aligned_alloc in the native memory tracker (#1130) 2025-02-13 19:17:52 +00:00
Andrei Pangin
bd439d8a04 #1132: Apply LD_PRELOAD to the test binary, not shell 2025-02-12 03:22:26 +00:00
Kerem Kat
cede318ece Patch both rela.plt and rela.dyn for hooks (#1128) 2025-02-10 13:20:13 +00:00
Andrei Pangin
ff21b118b2 LD_PRELOAD nativemem test (#1126) 2025-02-06 11:20:57 +00:00
Andrei Pangin
3d950aed89 Fixed 'Zero key not allowed' exception with non-Java profiles 2025-02-05 15:56:39 +00:00
Kerem Kat
fc24d60d1b Implement nofree, which does not track frees in nativemem. (#1119) 2025-02-05 12:20:59 +00:00
Long Yang
518a7528b4 Fix typo in the doc (#1124) 2025-02-05 10:16:13 +00:00
Kerem Kat
d1498a6c7f Keep CC/CXX env changes without breaking CROSS_COMPILE (#1099) 2025-01-16 21:14:48 +03:00
Andrei Pangin
10fa9ee313 Clear ThreadCpuTime buffer when Wall Clock profiling restarts 2025-01-14 03:00:33 +00:00
Andrei Pangin
29ee888a42 #1101: Fix "Unsupported JVM" on OpenJ9 JDK 21 2025-01-12 05:16:47 +00:00
Andrei Pangin
6c0aff487b Increase number of reserved frames 2025-01-08 19:32:50 +00:00
Andrei Pangin
b55cb7c973 Use relative URLs in docs where possible 2025-01-08 19:32:17 +00:00
Andrei Pangin
77a5339b2c Redundant "Profiling started" message when running "asprof -d" 2025-01-06 01:45:07 +00:00
Andrei Pangin
a60f310dae Update CHANGELOG 2025-01-04 03:28:55 +00:00
Andrei Pangin
e98e76fbba #1098: Fixed wrong check in getBreakpoint() 2025-01-04 03:16:48 +00:00
Andrei Pangin
944dd405d9 Documentation edits 2025-01-04 00:47:02 +00:00
Andrei Pangin
0e3a7d7b7a Documentation edits 2025-01-04 00:44:20 +00:00
Andrei Pangin
fa937dbbc3 Documentation edits 2025-01-03 02:50:54 +00:00
Andrei Pangin
1b7bb4adca Fixed clang warnings 2025-01-03 01:13:28 +00:00
Andrei Pangin
21522393a2 #1097: FlameGraph: navigation through search results
Co-authored-by: Daryl Tan <3646725+openorclose@users.noreply.github.com>
2025-01-02 03:25:55 +00:00
Andrei Pangin
f334c51b7e Support cstack=vm/vmx on macOS 2025-01-01 00:40:14 +00:00
Andrei Pangin
87c5436731 Better allocation stack traces on non-HotSpot JVMs 2024-12-31 19:16:55 +00:00
Andrei Pangin
9afed85559 Suppress dynamic attach warning 2024-12-31 19:16:50 +00:00
Andrei Pangin
7691403e76 #1095: jfr print fails when a recording has empty pools 2024-12-30 19:50:24 +00:00
Artyom Drozdov
977a2f446b Heatmap: fixed races between iterative rendering and user interaction (#1094) 2024-12-30 18:41:03 +00:00
Artyom Drozdov
5a3636d5df Reduce max canvas size (#1093) 2024-12-30 14:45:40 +00:00
Andrei Pangin
75c71bfbd9 Limited support for nativemem profiling on macOS 2024-12-30 02:38:31 +00:00
Artyom Drozdov
ec83ae6d7a Heatmap converter (#944)
Signed-off-by: Artyom Drozdov <artyomd@lightrun.com>
Co-authored-by: Andrei Pangin <noreply@pangin.pro>
2024-12-29 17:53:49 +00:00
Andrei Pangin
37829cc30c Refactor JfrConverter for better extensibility 2024-12-27 19:46:59 +00:00
Andrei Pangin
2feb6f6e0e Refactoring and optimization of nativemem converter 2024-12-26 22:39:46 +00:00
Andrei Pangin
9cf87d8834 Minor doc rewording 2024-12-26 22:33:49 +00:00
Andrei Pangin
a2b7eecfeb Fixed TTSP test 2024-12-26 22:29:47 +00:00
Long Yang
3044fb1931 #1089: Fix stack trace memory pollution 2024-12-22 21:32:34 +00:00
Andrei Pangin
8da7831bb1 #1084: Reimplement logging without stdio 2024-12-19 03:10:05 +00:00
Andrei Pangin
217d25ab9c #1084: Fixed Log races 2024-12-18 19:43:24 +00:00
Andrei Pangin
7e6ceb7d5b #1084: Updated documentation about perf_event_mlock_kb 2024-12-18 18:04:23 +00:00
Andrei Pangin
c94fb08e6c nativemem fixes 2024-12-18 13:45:13 +00:00
Andrei Pangin
c578c17527 Fixed DWARF unwinding of nativemem stacks on ARM64 2024-12-18 03:37:20 +00:00
Aleksey Shipilëv
49aadce877 Cross-compilation does not work after CC/CXX changes (#1082) 2024-12-16 21:07:38 +00:00
Kerem Kat
78f78cf681 Native memory profiler (#1064) 2024-12-16 13:58:59 +00:00
千夜
5ebc82dd04 Fix JfrVisualization URL (#1079) 2024-12-11 15:49:34 +00:00
Andrei Pangin
5054a6b601 Fixed compiler warning about sprintf 2024-12-11 01:39:44 +00:00
Andrei Pangin
f0ceda6356 Unwind String.indexOf intrinsic on AArch64. Fix TTSP test. 2024-12-09 02:26:58 +00:00
Andrei Pangin
16f15de48e Rewrote time-to-safepoint test 2024-12-09 01:29:48 +00:00
Daryl Tan
d7ab6428e9 Add --nostop option that continues profiling even when using --ttsp or --begin/--end (#1046) 2024-12-09 01:27:18 +00:00
Andrei Pangin
b7d66a5223 Fixed comparison of NULL strings in C++ unit tests 2024-12-07 03:15:14 +00:00
Kerem Kat
7e5706121d Parse both .rela.dyn and .rela.plt of libraries. (#1074) 2024-12-07 01:21:42 +00:00
Kerem Kat
5fe149a639 Add markdown formatter (#1071) 2024-12-07 00:37:33 +00:00
Andrei Pangin
9c37bb0f62 Doc formatting 2024-12-06 23:06:32 +00:00
Andrei Pangin
04cc4759d4 Minor formatting; removed whitespace at EOL 2024-12-06 22:58:14 +00:00
Ariel Ben-Yehuda
bce178e3a5 add demangling of V0 Rust symbols (#1070) 2024-12-06 20:24:27 +00:00
Andrei Pangin
99df42dcbe Fixed kernel tests when perf_events are restricted 2024-12-05 19:36:26 +00:00
Andrei Pangin
3ccae7c9b5 Productize VMStructs-based stack walker (#1073) 2024-12-05 18:23:47 +00:00
Kerem Kat
34cee4b4fd Fix CHECK_OP string assertions. (#1072)
In addition to const char*, accept char* as string as well.
2024-12-05 15:30:22 +00:00
Andrei Pangin
f8e887eb9d Revised fix for JfrTest.regularPeak test 2024-12-02 23:41:06 +00:00
Andrei Pangin
deb1880b5d Fixed JFR test on macOS 2024-11-30 21:57:27 +00:00
Andrei Pangin
6993349079 Count missed samples when estimating total CPU time in ctimer mode (#1068) 2024-11-30 15:09:26 +00:00
Soumadipta Roy
fc440b0db9 Fix misses and build issues in Non Java profiling example in documentation (#1066) 2024-11-28 23:29:28 +00:00
Soumadipta Roy
25abe28782 Rectify flamegraph colors guide positioning (#1062) 2024-11-25 19:16:23 +00:00
Soumadipta Roy
fcdc5bd200 Rectify colored text for github markdown (#1061) 2024-11-25 18:48:53 +00:00
Andrei Pangin
b174febb3e Update README 2024-11-25 02:50:15 +00:00
Soumadipta Roy
ad141d1c15 Restructure and update documentation (#1029) 2024-11-24 22:37:42 +00:00
Daryl Tan
389a710276 Update build status badge in README.md to new workflow (#1059)
Signed-off-by: Daryl Tan <3646725+openorclose@users.noreply.github.com>
2024-11-22 16:28:05 +00:00
Kerem Kat
6682ad6fb0 Test corretto 11, 17, 21, 23 in CI (#1058) 2024-11-21 18:31:07 +00:00
Kerem Kat
3cf733d589 Unit test fw from scratch and PerfEventType::forName tests (#1036) 2024-11-18 13:11:21 +00:00
David Alvarez
603db84cde Fix errors in LockTests.datagramSocketLock (#1053) 2024-11-13 15:29:46 +00:00
Kerem Kat
f863502c8e Initialize CodeCache to NULL (#1051) 2024-11-11 11:29:06 +00:00
Daryl Tan
083d85f80a Upload hs_err logs to artifacts (#1050) 2024-11-08 14:52:41 +00:00
Kerem Kat
e6a319e1b6 Increase setup_lib_path buf size to PATH_MAX (#1049) 2024-11-08 14:24:33 +00:00
Andrei Pangin
36168a1f24 #1041: Adjust SP of a caller frame obtained with __builtin_frame_address 2024-11-07 00:28:29 +00:00
Andrei Pangin
0cd9726a81 #1044: Automatically switch on --all-user for non-CPU events if kernel profiling is unavailable 2024-11-07 00:02:20 +00:00
Daryl Tan
6f2c6cc666 Nightly builds enhancement (#1047) 2024-11-06 16:33:47 +00:00
Andrei Pangin
6c32ce9701 #1044: Fall back to ctimer for CPU profiling when perf_events are unavailable 2024-11-04 03:14:09 +00:00
Andrei Pangin
adecac7907 Print exceptions in test output; measure test run time in seconds 2024-11-03 01:49:06 +00:00
Andrei Pangin
870833bcc7 #1007: Use ExecutionSample event for CPU profiling and WallClockSample for Wall clock profiling 2024-11-02 15:47:56 +00:00
Kerem Kat
116504c9f7 Initialize rem_* in DwarfParser.parseInstructions (#1039) 2024-10-26 03:57:21 +01:00
Andrei Pangin
f59894d912 EventAggregator helper method 2024-10-25 22:30:21 +01:00
Andrei Pangin
0432101955 #1040: Removed the link to outdated wiki
Signed-off-by: Andrei Pangin <1749416+apangin@users.noreply.github.com>
2024-10-24 17:24:09 +01:00
Kerem Kat
08f29f82ff Fix possible uninitialized variable access, method_class (#1034) 2024-10-22 15:13:58 +01:00
Andrei Pangin
6bd9dfaf8e Merged Dockerfiles 2024-10-20 01:12:45 +01:00
Daryl Tan
ecd8f3ac96 Refactor nightly builds to delete previous release and create a new release (#1032) 2024-10-18 19:57:29 +01:00
Kerem Kat
59d2defaa6 Next generation test runner (#1023) 2024-10-18 14:43:42 +01:00
Kerem Kat
62dca46d20 Simplify java assertions and add messages to all (#1027) 2024-10-17 11:25:57 +01:00
Soumadipta Roy
5a90a82231 Add GHA to validate required license headers (#1016) 2024-10-16 16:44:07 +01:00
Kerem Kat
da3f5f329c Reset CallTraceStorage counters before reporting live objects (#1009) 2024-10-16 16:18:22 +01:00
Daryl Tan
63009d09b6 Update paths to codebuild runners (#1028) 2024-10-16 12:23:14 +01:00
Daryl Tan
8c28cd8731 Implement builds on each commit using Github Actions (#998) 2024-10-16 12:00:28 +01:00
Daryl Tan
80b59a0101 Update tests to pass more reliably (#1022) 2024-10-14 13:14:25 +01:00
Andrei Pangin
74ffc675be Fix flaky tests 2024-10-14 12:34:15 +01:00
Kerem Kat
4ee2a5df39 Log test args and inputs. (#1021) 2024-10-11 14:50:27 +01:00
Andrei Pangin
445bafb861 Do not fail sudo tests if running under root 2024-10-11 00:10:16 +01:00
Andrei Pangin
3992d6a25d Fixed crashes on Alpine when profiling native apps 2024-10-10 23:35:48 +01:00
Andrei Pangin
4208d5c31c Minor style/formatting changes 2024-10-10 21:02:56 +01:00
Kerem Kat
e0885e348e Assert on total wait time in lock contention tests (#1017) 2024-10-10 20:11:34 +01:00
Andrei Pangin
061f03d79f An option to coarsen flame graphs (#1018) 2024-10-10 15:21:42 +01:00
Andrei Pangin
37ff942ae6 EventAggregator collects both samples and values 2024-10-09 22:08:36 +01:00
Daryl Tan
1291692c02 Obtain can_generate_sampled_object_alloc_events JVMTI capability only when needed (#1011) 2024-10-09 15:34:14 +01:00
Andrei Pangin
2d043110ab Fixed warnings with -Xcheck:jni 2024-10-01 00:23:23 +03:00
Andrei Pangin
1ed9df96d9 Intercept java.util.concurrent locks more efficiently (#1013) 2024-10-01 00:21:50 +03:00
Andrei Pangin
9db26318de Fixed test compatibility issues 2024-09-30 02:03:08 +03:00
Andrei Pangin
f43a6f8e1c Lock sampling fixes:
- reset total_duration on restart;
- update duration counter only for eligible locks;
- use u64 for timestamps;
- deallocate JNI function table.
2024-09-28 23:22:15 +03:00
Andrei Pangin
f53bfd4a58 #1007: Optimize wall clock profiling 2024-09-26 18:44:14 +01:00
Andrei Pangin
a386afa3d8 #1003: Support both tracefs and debugfs for kernel tracepoints 2024-09-25 23:34:43 +01:00
Andrei Pangin
e50446081a #983: Added support for jfr->collapsed conversion 2024-09-25 23:12:50 +01:00
Andrei Pangin
5a093b64fc Fixed test compilation warnings 2024-09-25 23:09:20 +01:00
Soumadipta Roy
82ebedad09 Add missing copyright header. (#1010) 2024-09-25 16:36:21 +01:00
Soumadipta Roy
d66717cbcf Add new test to validate JFR output with JDK JFR parser. (#1005) 2024-09-25 16:04:50 +01:00
Andrei Pangin
2833cabe67 Whitespace at EOL 2024-09-25 01:29:23 +01:00
Daryl Tan
0ed06fa7f1 Tweak tests so that they pass more reliably (#1006) 2024-09-24 20:07:08 +01:00
Aleksey Shipilëv
f66056a2c5 Make profiler version and arch tag configurable (#1004) 2024-09-20 13:34:50 +01:00
Andrei Pangin
69c9ddacdd An option to display instruction addresses (#1002) 2024-09-19 13:29:11 +01:00
Kerem Kat
6d7f73aec1 Profiling output respects loglevel (#986) 2024-09-17 01:01:40 +01:00
Andrei Pangin
c299592f15 #993: Filter native frames in allocation profile: OpenJ9 and stripped symbols 2024-09-15 02:30:12 +01:00
Johannes Bechberger
e6a1581ca3 Fix KernelTests#notLinux test (#997) 2024-09-13 18:39:49 +01:00
Andrei Pangin
69e04795b9 #993: Filter native frames in allocation profile 2024-09-13 02:32:26 +01:00
Soumadipta Roy
c122fde475 Enable jfr tests (#992) 2024-09-12 17:00:19 +01:00
Kerem Kat
25fa02e103 Sample contended locks by overflowing interval bucket (#982) 2024-09-09 14:30:44 +01:00
Kerem Kat
b9a3737531 Avoid JVM crash by deleting JNI refs after calling GetMethodDeclaringClass (#981)
This is a partial fix, complementing JDK-8268364.
2024-09-04 18:00:12 +01:00
Soumadipta Roy
d92b893826 Add flamegraph to collapsed conversion in test framework (#976) 2024-08-23 14:38:15 +01:00
Alih789
b8025a2449 Make JfrClass field method public so that G1HeapSummary class has access to determine difference in fields based on jdk version. (#971) 2024-08-14 22:58:22 +01:00
Andrei Pangin
02670ccde5 Fixed out-of-bounds array access in getDebuginfodCache() 2024-08-14 19:13:35 +01:00
Andrei Pangin
76311145c8 #843: Fix race between parsing and concurrent unloading of shared libraries 2024-08-12 02:01:47 +01:00
Andrei Pangin
179b0b1285 Minor stylistic changes 2024-08-07 07:49:14 +01:00
Kerem Kat
40118962ad #929: Load symbols from debuginfod cache (#962) 2024-08-07 07:09:17 +01:00
Andrei Pangin
321a712ff8 Added Native API test 2024-07-26 14:41:40 +03:00
Andrei Pangin
78123a85a7 Fixed parsing non-PIC executables 2024-07-26 14:37:09 +03:00
Andrei Pangin
7f712bb4e9 Fixed recursion in pthread_create when using native profiling API 2024-07-26 14:35:38 +03:00
Andrei Pangin
09f0a8fef7 #959: JFR converter fails to filter thread states in jfrsync profiles 2024-07-18 17:49:11 +01:00
Andrei Pangin
6207d5dc86 Fix unsafe access to CodeCache 2024-07-16 21:59:18 +01:00
Andrei Pangin
c9cb0c1cb2 Minor cleanup 2024-07-16 21:58:47 +01:00
Long Yang
bd095f13ec #955: Add --libpath option to specify the path of libasyncProfier.so in the container 2024-07-15 15:22:21 +01:00
Andrei Pangin
174e295ffb Test framework cleanup and fixes 2024-07-14 23:44:46 +01:00
Soumadipta Roy
76c2024e83 Test framework and a set of new tests (#812, #951) 2024-07-09 15:59:26 +01:00
Andrei Pangin
e38cd32e70 #952: Docker image for building async-profiler release packages for x64 and arm64 2024-07-07 16:16:11 +01:00
Andrei Pangin
c25345454a #952: Solve musl and glibc compatibility issues 2024-07-07 16:08:52 +01:00
Andrei Pangin
4e9f0961c0 Simplify Makefile 2024-06-27 03:03:20 +01:00
Andrei Pangin
0d90f1c817 Do not package redundant .md files 2024-06-25 13:52:48 +01:00
Andrei Pangin
df4af83dc2 Added issue and pull request templates 2024-06-25 13:42:27 +01:00
Andrei Pangin
f4528dec7c Added CoC and Contributing Guidelines 2024-06-25 09:42:42 +01:00
Andrei Pangin
30f4f321b8 Cleanup dead code 2024-06-25 09:03:40 +01:00
Andrei Pangin
28fbb132c5 Handle truncated JFRs 2024-06-22 00:56:09 +01:00
Andrei Pangin
9660e15b1e Fix jfrconv build rules for macOS 2024-06-19 01:41:31 +01:00
Andrei Pangin
33a65c8dac #895: Rewrote jfrconv to make it a statically linked executable 2024-06-16 22:16:50 +01:00
Andrei Pangin
dbcaa4d81a JfrReader should jump over custom events 2024-06-11 23:08:50 +01:00
Andrei Pangin
475343646c JfrReader should read strings in a constant pool 2024-06-11 15:41:39 +01:00
Andrei Pangin
2f30dc3f9b #940: lseek64 is not declared when compiling on Alpine/aarch64 2024-06-01 23:05:45 +01:00
Andrei Pangin
34a9e1354a #923: cstack=vm support for JDK 23+ 2024-05-24 01:57:00 +01:00
Andrei Pangin
0c4cd5cb14 #934: Fix crash on Zing in a native thread 2024-05-23 02:29:52 +01:00
Andrei Pangin
048cafa4ef #923: Adjust VMStructs for JDK 23 2024-05-13 01:32:28 +01:00
Andrei Pangin
ed7d848061 #928: Do not record live object samples between JFR chunks 2024-05-12 00:59:05 +01:00
Andrei Pangin
b96d09a883 Minor refactoring 2024-05-12 00:45:20 +01:00
Andrei Pangin
9733a08e93 An option to accumulate JFR events in memory instead of flushing to a file (#925) 2024-05-07 23:58:55 +01:00
Andrei Pangin
07e6015d1d #921: Fix compilation failure on ARM32 2024-05-03 03:08:46 +03:00
Andrei Pangin
26a94a1839 Build jfrconv.exe for Windows 2024-05-02 02:49:21 +03:00
Andrei Pangin
2de2a187cc jfrconv: autodetect pb.gz output format 2024-05-02 02:47:57 +03:00
Andrei Pangin
ac514e57e9 #914: Do not set DebugNonSafepoints, if it is already set in the Command Line 2024-04-19 02:11:18 +01:00
Andrei Pangin
059cf1367f #917: Allow -e alloc,cpu,lock syntax 2024-04-18 23:37:41 +01:00
Andrei Pangin
005f7a3dc3 Workaround dependency on GLIBCXX_3.4.20 2024-04-10 16:30:55 +01:00
Andrei Pangin
978fe2ccde Pack stack traces in pprof format 2024-03-26 03:55:27 +00:00
Vsevolod Tolstopyatov
5da0a5f3f3 FlameGraph: search with Command+F key (⌘F) on mac (#906) 2024-03-22 22:52:34 +00:00
Andrei Pangin
241f5e3965 Build failure due to missing -ldl 2024-03-19 22:01:29 +00:00
Andrei Pangin
a75ebc7786 #905, #895: Converter enhancements 2024-03-16 02:50:01 +00:00
Andrei Pangin
397f450809 #759: Discover available profiling signal automatically 2024-03-16 00:02:41 +00:00
Andrei Pangin
1be8838964 Do not parse the same executable twice 2024-03-15 03:07:30 +00:00
Andrei Pangin
cbecc6f8b4 #893: Fix [no_Java_frame] on ARM64 2024-03-10 22:07:04 +00:00
Andrei Pangin
ddc280dfc4 #896: Flame Graph: alt+click to remove stacks 2024-02-28 19:30:56 +00:00
Andrei Pangin
7be97a0aa3 Fix DefineClass crash on OpenJ9 2024-02-23 02:43:47 +00:00
Andrei Pangin
9fe16b6eb5 #892: Resolve tracepoint id in asprof 2024-02-21 00:04:25 +00:00
Andrei Pangin
6248a879f9 Updated comment on -j option 2024-02-17 00:38:11 +00:00
Charm
3b2d969b49 Fix memory leak of keys from jvmti->GetSystemProperties (#891) 2024-02-14 12:55:25 +00:00
Chuan-kai Lin
ad05845b98 Fix Proto encoding of negative integers (#889) 2024-02-07 20:37:03 +00:00
Andrei Pangin
e751ea1b0d #886: Support cstack=vm for slowdebug JVM 2024-02-06 02:37:28 +00:00
Andrei Pangin
deec21814b #884: Record event timestamps early 2024-02-06 00:52:20 +00:00
Andrei Pangin
f657048f16 #885: Print error message if JVM fails to load libasyncProfiler.so 2024-02-05 01:35:34 +00:00
Andrei Pangin
03004b6505 #769: Workaround for JDK-8312065 on JDK 8 2024-01-28 18:06:55 +00:00
Andrei Pangin
7643623628 #872: Get rid of libstdc++ streams 2024-01-27 23:51:15 +00:00
Andrei Pangin
a17529378b #881: --to/from arguments did not work with jfrsync 2024-01-24 00:34:58 +00:00
Andrei Pangin
985738ec37 Replace source/target with release in pom.xml 2024-01-22 22:49:12 +00:00
Andrei Pangin
4e441b4024 Release 3.0 2024-01-20 23:17:15 +00:00
Andrei Pangin
76012dc568 Fix INCBIN macro usage 2024-01-20 22:59:13 +00:00
Andrei Pangin
a76d06f1f2 Update CHANGELOG and README 2024-01-20 18:53:42 +00:00
Andrei Pangin
81ad77eadd Delete travis config 2024-01-20 15:47:52 +00:00
Andrei Pangin
9fad48890c Use shorter variant of copyright headers 2024-01-20 15:34:53 +00:00
Andrei Pangin
d23121855a Update sample flamegraph 2024-01-20 14:59:38 +00:00
Andrei Pangin
fdb4bb4819 Update links to the github repo 2024-01-20 13:51:32 +00:00
Andrei Pangin
8fd1db0d8d Sync jattach sources with jattach repo 2024-01-20 13:39:19 +00:00
Tolya Korniltsev
77140be5fa jfr2pprof: Fix multichunk jfr symbols and location's line numbers (#879) 2024-01-17 02:24:00 +00:00
Andrei Pangin
7b50b1d834 #819: Extend AArch64 stack walking fix to cstack=dwarf 2024-01-16 23:49:01 +00:00
Andrei Pangin
b653d28eae #819: Set default DWARF unwind table for vDSO 2024-01-15 20:36:01 +00:00
Andrei Pangin
9e4f2a6af6 #819: Fix AArch64 stack unwinding with cstack=vm 2024-01-15 15:44:02 +00:00
Long Yang
8910a7a462 #873: the accuracy issue of ObjectSampler in --total mode (#876) 2024-01-05 18:44:29 +00:00
Andrei Pangin
915b09067c #726: Allow LD_PRELOAD of JVM TI agent 2024-01-03 02:31:56 +00:00
Andrei Pangin
1a5d74e23c #841: Fix LD_PRELOAD support 2024-01-01 20:52:29 +03:00
Andrei Pangin
809a19ce8f asprof help misses cstack=vm 2023-12-19 23:45:27 +00:00
Andrei Pangin
fcdb4aeec7 Automatically shutdown profiler upon reaching FD limit 2023-12-18 00:18:20 +00:00
Andrei Pangin
9e874cb94b #864: Reduce Flame Graph size 2023-12-16 03:09:54 +00:00
Andrei Pangin
f099abe619 Normalize Lambda names from original JFR recordings 2023-12-15 04:41:17 +00:00
Andrei Pangin
d3dde7e5e7 Fix stack unwinding from nmethod_entry_barriers on JDK 21 2023-12-15 03:53:36 +00:00
Andrei Pangin
9b5dc907a5 Fix detection of JIT compilation level on JDK 21 2023-12-15 01:10:45 +00:00
Andrei Pangin
5da01c0a44 #864: Deduplicate strings in a Flame Graph 2023-12-12 01:47:41 +00:00
Andrei Pangin
b8a60e66de #863: asprof to run jattach commands 2023-12-10 03:32:25 +00:00
Andrei Pangin
02c5934300 Fix ActiveSetting value for cstack=vm in JFR recording 2023-12-05 12:57:07 +00:00
Suresh
29b78f4670 Make converter Arguments class public (#859) 2023-12-04 21:24:31 +00:00
Andrei Pangin
ff0d86b8e7 #831: Workaround for JDK-8313816 2023-12-04 13:28:05 +00:00
Andrei Pangin
aa0305965f Updated supported platforms and change log 2023-12-04 00:10:11 +00:00
Andrei Pangin
4397b70894 #857: Support JFR recordings from JDK 22 2023-12-03 17:45:40 +00:00
Andrei Pangin
8e0b0953f6 #832: Normalize names of hidden classes / lambdas 2023-12-03 02:18:48 +00:00
Andrei Pangin
db9c4c4c56 #783: Shutdown asprof gracefully on SIGTERM 2023-12-03 01:52:38 +00:00
Andrei Pangin
3d28531e26 #832: Normalize names of hidden classes / lambdas 2023-12-03 00:58:34 +00:00
Andrei Pangin
bfdaa44260 #849: Parse concatenated multi-chunk JFRs 2023-12-03 00:39:40 +00:00
Andrei Pangin
ab6a7c9bd5 #853: Workaround for JDK-8321116 2023-12-01 21:07:47 +00:00
Leslie Zhai
104b7bda5a Initial loongarch port (#770) 2023-12-01 09:42:43 +00:00
Andrei Pangin
8d77e0909c #855: ctimer mode for accurate profiling without perf_events 2023-12-01 01:44:53 +00:00
Andrei Pangin
a1354b6a75 Restructure perfEvents sources; fix off-by-one bug 2023-11-28 01:49:54 +00:00
Andrei Pangin
0d0f0f0c67 #644: Fixed RISC-V build and tests 2023-11-26 03:25:04 +00:00
Aleksey Shipilëv
752b79ec4e Basic RISC-V support (#644) 2023-11-26 03:22:01 +00:00
Thomas Matthijs
48a97b64e7 #827: Handle both -jar and -cp when running converter.jar 2023-11-24 15:28:34 +00:00
Andrei Pangin
4e13b8138b #803: --loop cannot be combined with --fdtransfer 2023-11-24 01:27:44 +00:00
Andrei Pangin
b1b5b9898f #840, #637: loop and timeout options should work regardless of how the profiler was started 2023-11-24 00:43:33 +00:00
Andrei Pangin
6ab9f83aec #834: Protect VMStructs::initLogging from OutOfMemoryError 2023-11-03 02:16:26 +00:00
Andrei Pangin
49d08fd068 Fixed macOS build 2023-11-03 01:57:04 +00:00
Andrei Pangin
a237cad115 #833: Time-to-safepoint JFR event 2023-11-02 01:20:00 +00:00
Andrei Pangin
50e63b7ad6 Demangle Rust symbols correctly and shorten C++ signatures by default 2023-11-01 00:32:56 +00:00
Andrei Pangin
c2fed3b63a Improve DWARF parser on AArch64 2023-10-31 01:43:01 +00:00
Andrei Pangin
d29cb86bd0 Fixed build with MERGE=false 2023-09-20 23:32:30 +01:00
Andrei Pangin
326d797d5a Restart interrupted poll/epoll_wait manually 2023-09-20 23:21:08 +01:00
Andrei Pangin
cb95d37b84 Hardware breakpoints stuck in infinite loop on ARM64 2023-09-05 14:27:13 +01:00
Andrei Pangin
e799c2a665 fastThreadId fix 2023-09-05 14:26:23 +01:00
Andrei Pangin
56a50a8002 Handle AArch64-specific DWARF opcode 2023-09-05 01:44:49 +01:00
Andrei Pangin
f483c166ba Fixed AArch64 arraycopy stack walking on JDK 8 2023-09-04 17:47:02 +01:00
Andrei Pangin
3c26f5138a Resolved cyclic dependency in VMStructs initialization 2023-09-04 17:23:59 +01:00
Andrei Pangin
02e92eb45c Allow profiling startup allocations on JDK 11+ 2023-09-01 18:03:19 +01:00
Andrei Pangin
902d17d0b1 Workaround for JDK-8313796 2023-09-01 13:03:29 +01:00
Olga Stogova
a97cf59f5b Refresh design of tree view (#808) 2023-08-31 11:30:41 +01:00
Andrei Pangin
5fc3c975fe Unwind copy_longs stubs on AArch64 2023-08-31 03:07:32 +01:00
Andrei Pangin
e79c44eb38 Fixed attach to OpenJ9 on macOS 2023-08-30 02:45:45 +01:00
Andrei Pangin
77a308a7b9 Fixed DWARF stack walking on macOS/ARM64 2023-08-30 02:14:32 +01:00
Andrei Pangin
d1de831e6d #805: Lock profiling optimizations 2023-08-28 01:13:49 +01:00
Andrei Pangin
847504dfd4 Change profiler.sh to asprof in the documentation 2023-08-27 22:13:38 +01:00
Andrei Pangin
dcc3ffd083 NPE when closing JfrReader 2023-08-24 12:17:00 +01:00
Andrei Pangin
0a5ac270a0 #450: Close log file deleted by the launcher 2023-08-23 00:21:04 +01:00
Andrei Pangin
62b2387a80 #791, #800: Fix path translation for containers 2023-08-20 16:24:00 +01:00
Andrei Pangin
409a3e3b19 Build for Java 8 by default, but release for Java 7 2023-08-20 02:03:15 +01:00
Andrei Pangin
26954081d3 Workaround for JDK-8237858 on AArch64 2023-08-18 17:47:42 +01:00
Andrei Pangin
98ad173d24 Bump up major version 2023-08-17 16:41:51 +01:00
Andrei Pangin
6e014258e3 #795: Experimental option to obtain stack traces without AsyncGetCallTrace 2023-08-16 16:50:45 +01:00
Andrei Pangin
625dcd2615 Compiler warnings: sprintf -> snprintf 2023-08-11 14:00:07 +01:00
Andrei Pangin
a852bdd22e #747: Workaround for JDK-8307549 2023-08-02 16:38:30 +01:00
Andrei Pangin
eb2879939b Fixed clang compilation failure 2023-07-31 00:28:40 +01:00
Andrei Pangin
db457f3f8b #777: Show JIT compilation task (#785) 2023-07-30 20:27:37 +01:00
Andrei Pangin
25587664ab Fix for displaying vtable targets on JDK 21 2023-07-26 00:52:33 +01:00
Andrei Pangin
14c495dc53 #776: Annotate JFR unit types with @ContentType 2023-07-22 22:17:23 +01:00
Andrei Pangin
27e2d4cb4b Run tests with loglevel=error 2023-07-21 19:02:20 +01:00
Andrei Pangin
3c33c755be Fix for #761: relocate addresses in .dynamic section properly 2023-07-16 15:24:39 +01:00
Andrei Pangin
7a52d79b16 Support UseCompressedObjectHeaders aka Lilliput 2023-07-11 16:05:31 +01:00
Andrei Pangin
117594bb4d #759: Configure alternative profiling signal 2023-07-04 14:19:08 +01:00
Andrei Pangin
89bf3eabc0 #761: Parse dynamic linking structures 2023-07-03 17:28:04 +01:00
Andrei Pangin
f9138bd7f6 Avoid chain reaction when profiling context-switches 2023-06-16 15:06:37 +01:00
Andrei Pangin
557a15f47f Return error if profiling mode is not supported with non-Java processes 2023-06-12 16:32:48 +01:00
Andrei Pangin
d46d6c67c0 #751: Profile non-Java processes 2023-06-12 15:07:48 +01:00
Andrei Pangin
03dc1b9c98 jfr2flame converter adds redundant 'tid' to thread names 2023-06-02 02:45:45 +01:00
Andrei Pangin
5d24f85d1b Compatibility with instrumenting agents 2023-06-02 02:37:05 +01:00
Andrei Pangin
cb1c00013f Improve reader support for custom JFR events. Add CPULoad and ObjectCount events. 2023-05-21 01:05:32 +01:00
Andrei Pangin
e53eab62e4 Add support for custom events in JfrReader 2023-05-19 02:22:39 +01:00
Andrei Pangin
d8f1b9c15a #750: --jfrsync may specify a list of JFR events 2023-05-19 00:37:32 +01:00
Andrei Pangin
567b97d223 Fixed crash with combined cpu+wall profiling 2023-05-17 16:14:53 +01:00
Kirill Timofeev
c7dd73ca6e "check" action prints result to a file (#684) 2023-05-13 01:38:24 +01:00
Alkis Evlogimenos
bbc1b3270f Compile with -std=c++2a without errors (#721) 2023-05-13 01:34:41 +01:00
Andrei Pangin
3bdf9db876 #689: Calculate ELF base address correctly 2023-05-13 01:24:58 +01:00
Andrei Pangin
c6a43ae2ee Compilation fix 2023-05-04 01:13:03 +01:00
Andrei Pangin
6c9b71594c Support GCHeapSummary event in JfrReader 2023-05-04 01:09:28 +01:00
Andrei Pangin
1cb2c05161 Fixed flaky test 2023-05-04 00:24:07 +01:00
Andrei Pangin
6dfd2159fe Fixed allocation profiling on JDK 20.0.1 2023-05-03 23:56:38 +01:00
Andrei Pangin
bdceedda58 Draft Release 2.10 2023-04-25 08:46:30 +02:00
Johannes Bechberger
f57d3dcfda Fix build when the main folder contains flame.html or tree.html files (#743) 2023-04-20 12:51:14 +01:00
Andrei Pangin
39b0bdb5e4 Record GCHeapSummary events in JFR 2023-04-03 02:54:03 +01:00
Andrei Pangin
ac2eabfe96 #740: Profile CPU + Wall clock together 2023-04-02 01:25:10 +01:00
Andrei Pangin
f64eed870a #739: JFR overflow fix 2023-04-01 21:36:34 +01:00
Andrei Pangin
d51f445243 #708: Fix deadlock between Profiler::stop and timerLoop 2023-03-31 00:50:22 +01:00
Andrei Pangin
578e3a1162 #734: Raw PMU event descriptors 2023-03-28 04:31:42 +01:00
Andrei Pangin
6b35b46070 #736: Show targets of vtable/itable calls 2023-03-28 03:59:33 +01:00
Andrei Pangin
ac561f3ba7 #733: Make the same binary work with glibc and musl 2023-03-24 03:51:28 +00:00
Andrei Pangin
7f4b6536c7 #724: No more profiler.sh 2023-03-24 01:48:53 +00:00
Andrei Pangin
e3b7bfca22 Fixed link to build status 2023-03-14 03:32:31 +00:00
Andrei Pangin
c8de91df6b #723: --clock option to select JFR timestamp source: TSC or CLOCK_MONOTONIC 2023-03-14 03:31:06 +00:00
Andrei Pangin
91691ce039 #702: Make Flame Graph status line and search results always visible 2023-03-12 20:27:05 +00:00
Andrei Pangin
4a7ce8ce79 #724: Binary launcher 2023-03-12 04:17:32 +00:00
Andrei Pangin
a8f20ebc79 #719: Automatically classify execution samples into categories 2023-02-26 18:28:25 +00:00
Andrei Pangin
62c1a799ef An option to read JFR file by chunks 2023-02-23 16:40:31 +00:00
Andrei Pangin
d350738229 #714: Prefer ObjectSampler for allocation profiling when JVM has no debug symbols 2023-02-23 00:34:07 +00:00
Andrei Pangin
ada44ee12e #712: Update README regarding chunksize and chunktime 2023-02-23 00:16:52 +00:00
Andrei Pangin
9fc0495f86 Walk frames with missing DWARF info correctly 2023-02-21 02:02:17 +00:00
Andrei Pangin
db1ca37a2e Improve demangling of C++ symbols 2023-02-21 01:58:05 +00:00
Andrei Pangin
4ffdb05d3a Allow JfrReader to parse in-memory buffer 2023-02-17 15:59:25 +00:00
Andrei Pangin
cdb8704156 Demangle Rust symbols 2023-02-14 02:15:48 +00:00
Andrei Pangin
c722b3972a Avoid crash in fillFrameTypes() 2023-02-14 01:54:42 +00:00
Andrei Pangin
48703edee7 Do not dump redundant threads in a JFR chunk 2023-01-09 23:09:43 +00:00
Andrei Pangin
63799a6055 Better fix for mapped pseudofiles 2022-12-22 00:11:38 +00:00
Andrei Pangin
ebda293a42 Do not try to parse mapped pseudofiles 2022-12-20 22:57:35 +00:00
Andrei Pangin
69c0340a08 [README] Clarification on allocation profiling and debug symbols 2022-12-13 02:25:47 +00:00
Andrei Pangin
45074592cf Simplified pom.xml 2022-11-27 02:19:12 +00:00
Andrei Pangin
32601bccd9 Release 2.9 2022-11-26 22:38:37 +00:00
Andrei Pangin
8c4824be7f Reference to ap-loader 2022-11-26 22:30:35 +00:00
Andrei Pangin
b08bf2d574 #93: Profiler API with embedded agent as a Maven artifact 2022-11-26 22:20:09 +00:00
Andrei Pangin
b4b2218782 Java API: extract embedded libasyncProfiler.so 2022-11-21 00:16:12 +04:00
Andrei Pangin
e1f149f3ae #669: Avoid JVM crash when reading Program Headers 2022-11-16 02:47:59 +04:00
Andrei Pangin
80808a3f1b 2.9 Release Candidate 2022-11-14 04:30:25 +04:00
Andrei Pangin
7eaefdb18f LiveObject JFR event 2022-11-14 03:57:20 +04:00
Andrei Pangin
5e4a402c7e #673: Try adjusting SP when AGCT fails 2022-11-13 00:09:36 +04:00
Andrei Pangin
b0a44524ba #392, #675: Do not truncate signatures in collapsed format 2022-11-12 23:00:55 +04:00
Yonatan Goldschmidt
ed092da71b Update README with C1/interpreted methods suffix info (#671) 2022-11-02 10:11:19 +04:00
Andrei Pangin
58c62fe4e8 Support leak profiler on OpenJ9 2022-10-15 01:45:17 +04:00
Andrei Pangin
bdaefa9a3b #658: Restart send/recv calls in case of EINTR 2022-10-13 23:35:13 +04:00
Andrei Pangin
8168c7dc91 #657: jfrsync sometimes fails when filename contains %t 2022-10-13 01:30:01 +04:00
Andrei Pangin
98a2006386 Fixed broken link 2022-10-12 01:10:21 +04:00
Johannes Bechberger
389d6c5daa Allow to set Bytecode version for Java compile in makefile (#655) 2022-10-12 00:25:59 +04:00
Andrei Pangin
03e6fc5a17 #633: Java Heap leak profiler 2022-10-03 04:14:17 +04:00
Andrei Pangin
f1e2b96a2f Reset 'alloc' and 'lock' arguments 2022-10-02 00:47:19 +04:00
Andrei Pangin
b5634b9d88 #647: Patch rpath for Homebrew JDK 2022-09-17 02:07:17 +03:00
Andrei Pangin
32b5fd8e3c vaddr in ELF Program Header might not match the actual load address 2022-09-16 17:12:55 +03:00
Andrei Pangin
b7dfd74a63 meminfo fixes 2022-09-16 15:57:22 +03:00
Andrei Pangin
ed30401cc2 #643: `--include/--exclude options in the FlameGraph converter 2022-09-13 19:03:06 +03:00
Andrei Pangin
d4bee9647f Print used memory stats 2022-09-13 02:29:14 +03:00
Andrei Pangin
d93477f680 Handle fcntl() and ioctl() failures 2022-09-12 01:02:32 +03:00
Andrei Pangin
696087c2ab Fixed VM.log command for JDK 18 2022-09-10 00:26:02 +03:00
Andrei Pangin
1e8301e831 Fix GCC 11 warnings 2022-09-09 23:40:10 +03:00
Andrei Pangin
64d9f98a0f Sync with jattach repo 2022-09-08 09:05:53 +03:00
Andrei Pangin
2613894b85 #645: Fixed attaching to a container on Linux 3.x 2022-09-08 08:55:54 +03:00
Andrei Pangin
b8c8db45d7 Do not use ELF NODELETE flag because of glibc bug 2022-08-29 23:18:47 +03:00
Yonatan Goldschmidt
14f58ed2c7 Generate randomized fdtransfer path per async-profiler invocation (#635) 2022-08-24 11:56:46 +03:00
Andrei Pangin
7fa11e768b #632: Fixed allocation profiling on Zing 2022-08-17 01:14:32 +03:00
Andrei Pangin
74ecedc671 Broken 'jfr print' in jfrsync mode 2022-08-16 17:49:07 +03:00
Andrei Pangin
63f2539e5e #636: Cannot parse JFR files produced by JDK 20 EA 2022-08-16 17:12:02 +03:00
Andrei Pangin
31261ea7be #626: --simple and --dot options in jfr2flame converter 2022-08-02 03:17:43 +03:00
Andrei Pangin
9cec0765cd Safe parsing of kallsyms 2022-07-25 21:12:56 +03:00
Andrei Pangin
55da899511 #622: Display inlined frames under a runtime stub 2022-07-24 02:35:07 +03:00
Andrei Pangin
56ae519224 Make CodeCache functions public 2022-07-17 00:38:14 +03:00
Andrei Pangin
733cf7c668 Release 2.8.3 2022-07-16 22:57:55 +03:00
Andrei Pangin
ee3ef243d3 Do not call System.loadLibrary, if libasyncProfiler is preloaded with -agentpath 2022-07-16 20:43:35 +03:00
Andrei Pangin
28357c2fb4 #618: Support virtualized ARM64 Mac 2022-07-16 18:34:07 +03:00
Andrei Pangin
2459d7eac4 #617: An option to generate certain JFR events by async-profiler in jfrsync mode 2022-07-16 03:10:03 +03:00
Andrei Pangin
cc1682d20a Handle different versions of Zing properly 2022-07-15 01:21:11 +03:00
Andrei Pangin
9ec48d9666 Could not recreate perf_event after the first failure 2022-07-14 18:04:13 +03:00
Andrei Pangin
e2abcd2238 Release 2.8.2 2022-07-14 01:59:33 +03:00
Andrei Pangin
9ae31b0e91 #611: Double click selects the name of the current frame 2022-07-10 04:07:27 +03:00
Andrei Pangin
a836ad6f89 #612, #616: Make possible to attach to Alpine container from a glibc host 2022-07-10 00:24:41 +03:00
Andrei Pangin
c6f11d2673 #615: Fixed JDK 7 crash in ConstMethod::id() 2022-07-06 20:00:18 +03:00
Andrei Pangin
989c3747e2 Fix CPU profiling on Zing JVM 2022-07-04 03:32:10 +03:00
Andrei Pangin
ce9ebc2b63 #610: Could not set dlopen hook on Arch Linux 2022-07-04 01:24:09 +03:00
Andrei Pangin
52cdc138d0 Properly cut C++ function arguments in JFR output 2022-07-03 23:26:11 +03:00
Andrei Pangin
d09be06029 Mark interpreted frames with _[0] in collapsed output 2022-07-03 21:53:22 +03:00
Andrei Pangin
859c36ef9c Fix FD leak when using fdtransfer client 2022-07-03 21:28:50 +03:00
Andrei Pangin
cd084b5a97 Disable Engine events when stopping profiler 2022-07-03 21:09:19 +03:00
Andrei Pangin
1505345ee9 Scan only alive nmethods 2022-07-03 20:26:13 +03:00
Andrei Pangin
8547d642ab Add timeout for the fdtransferClient connection 2022-06-21 15:17:47 +03:00
Andrei Pangin
fa989850b6 Release 2.8.1 2022-06-11 00:52:14 +03:00
Andrei Pangin
1e4738ba7b #607: INCBIN symbols resolved incorrectly in debug builds 2022-06-09 01:58:11 +03:00
Andrei Pangin
80a3c66969 Release 2.8.1-ea 2022-06-08 16:12:56 +03:00
Andrei Pangin
e1d5df7ffd #601: '--lib' option to customize profiler .so path in the container 2022-06-08 04:26:12 +03:00
Andrei Pangin
6ca5ad2c93 #526, #530: JFR converter improvements: time range, collapsed output, pattern highlighting 2022-06-08 03:58:35 +03:00
Andrei Pangin
7af24609eb #549: '%n' pattern in the filenames 2022-06-05 04:25:24 +03:00
Andrei Pangin
80302eb43d #555: Workaround JFR bug in AttachCurrentThread 2022-06-05 02:11:06 +03:00
Andrei Pangin
0396bc7d1b Allow 'profiler.sh list' command without PID 2022-06-04 15:14:38 +03:00
Andrei Pangin
b29bfd3f3a 'mcache' option to retain jmethodID name cache between runs 2022-06-02 04:11:38 +03:00
Andrei Pangin
93b8171601 Remove always-false condition 2022-06-01 03:11:47 +03:00
Nikita Nazarov
721225393e Remove library name fetching for unknown and error frames (#599) 2022-06-01 01:57:02 +03:00
Andrey Pangin
58a3cb0d25 Minor cleanup/formatting 2022-05-24 14:52:10 +03:00
Artyom Drozdov
6754312e83 Resources deduplication between cpp and java (#591) 2022-05-24 13:39:23 +03:00
Andrei Pangin
daf844397e Warn about I/O issues when dumping profile 2022-05-22 21:12:46 +03:00
Andrei Pangin
5e0021a99f Do not reuse safemode=64 which meant different thing 2022-05-22 14:30:43 +03:00
Andrei Pangin
37c56c44bb Workaround for JDK-8185348 2022-05-22 03:25:31 +03:00
Andrei Pangin
88dd2345ea Avoid race of writing to the same output file from two threads 2022-05-21 20:23:12 +03:00
Andrei Pangin
ded970fd50 Handle quirks of Alpine/musl dynamic linker 2022-05-19 03:47:50 +03:00
Andrei Pangin
796aff5555 Check zombie methods when marking frame types 2022-05-17 18:32:49 +03:00
Artyom Drozdov
d36e8e91f9 Bump java plugin source/target version (#590) 2022-05-17 15:56:13 +03:00
Andrei Pangin
323c02e0d7 Minor formatting 2022-05-16 02:16:42 +03:00
Tim Steinbach
53cfd2b8b1 Add pprof converter (#587) 2022-05-16 00:56:09 +03:00
Andrei Pangin
4431121760 #585: Fixed crash with repeated JFR dumps & lock profiling 2022-05-14 03:20:25 +03:00
Andrei Pangin
4a88ee445f Release 2.8 2022-05-10 04:17:09 +03:00
Andrei Pangin
ce0fc0a2d9 Fixed musl 2022-05-10 04:04:05 +03:00
Andrei Pangin
768b437593 Missing headers 2022-05-09 22:56:41 +03:00
Andrei Pangin
20fa8564c4 Do not merge source files when building fat binary on macOS 2022-05-09 20:21:42 +03:00
Andrei Pangin
aaf24effe8 ARM32 and x86 pop frame support 2022-05-09 20:00:44 +03:00
Andrei Pangin
3bbeed6267 ARM64 pop frame support 2022-05-09 14:57:51 +03:00
Andrei Pangin
cb51be8eb3 Merge different frame types of the same method on the Flame Graph 2022-05-09 02:44:21 +03:00
Andrei Pangin
7749b72e73 Tree view colors 2022-05-08 18:27:42 +03:00
Andrei Pangin
7112bc14e5 Stack recovery fixes 2022-05-08 17:45:38 +03:00
Andrei Pangin
56ca88677b Release 2.8-ea 2022-04-26 08:55:33 +03:00
Andrei Pangin
8a258d31a5 Show C1 compiled frames. Merge different compilation types on a flame graph. 2022-04-26 08:43:56 +03:00
Andrei Pangin
e33a01e0c6 Re-implemented stack recovery. Unwind native stacks manually before AGCT. 2022-04-26 08:37:22 +03:00
Andrei Pangin
5d5138ea61 Allow profiler server only at JVM startup 2022-04-24 16:15:31 +03:00
Andrei Pangin
10b5ad0ee5 #512: Simple HTTP server for managing async-profiler 2022-04-24 03:10:49 +03:00
Andrei Pangin
b5258cca2c JVM TI based allocation profiling fine tuning 2022-04-14 03:02:26 +03:00
Andrei Pangin
0718a09e0e #169: JVM TI based allocation profiling for JDK >= 11 2022-04-14 01:46:48 +03:00
Andrei Pangin
fe173c4101 #561, #579: Fixed concurrency issues when collecting samples / restarting profiler 2022-04-11 01:49:34 +03:00
Andrei Pangin
8032daa49d --cpu converter option to extract CPU profile from the wall-clock output 2022-03-31 03:05:22 +03:00
Andrei Pangin
fa8b8f8072 #569: Distinguish runnable/sleeping threads in OpenJ9 wall-clock profiler 2022-03-31 01:34:48 +03:00
Andrei Pangin
85f3a68c56 #570: Display an error when using 'jfrsync' on JDK without Flight Recorder 2022-03-30 23:47:26 +03:00
Andrei Pangin
60ce15569a #572: Careful parsing of ELF Program Headers 2022-03-29 04:24:06 +03:00
Andrei Pangin
9838ddb693 #572: CodeCache refactoring + parseProgramHeaders() fixes 2022-03-28 04:44:15 +03:00
Andrei Pangin
2f341043ef #571, #572: Locate DWARF info and GOT/PLT from ELF Program Headers 2022-03-27 22:09:08 +03:00
Andrei Pangin
ee55fbe17b #557: Do not fail on unknown argument 2022-03-10 01:01:29 +03:00
Andrei Pangin
d7a2a4fc8b Mark top methods as interpreted/compiled/inlined (#553) 2022-02-21 04:58:19 +03:00
Andrei Pangin
e9b7747015 Do not mmap perf page in --all-user mode. cstack=fp forces manual stack walking. 2022-02-19 23:05:55 +03:00
Andrei Pangin
b96e07b001 List loglevel argument 2022-02-16 03:17:34 +03:00
Ludovic Henry
ba00ca26c1 Add loglevel argument (#551) 2022-02-16 02:00:22 +03:00
Andrei Pangin
9ed175d73e Updated links to 2.7 binaries 2022-02-14 02:29:33 +03:00
Andrei Pangin
b287816559 Release 2.7 2022-02-14 00:57:18 +03:00
Andrei Pangin
9a979a712d OpenJ9-related fixes 2022-02-13 23:48:45 +03:00
Andrei Pangin
42442ed593 Fixed AllocTracer on ARM32 2022-02-13 21:59:59 +03:00
Andrei Pangin
432d622aa4 Follow-up to #537: added a few comments 2022-02-13 20:36:46 +03:00
Gunter Haug
f477f8d4c0 Look in plt sections for the global offset table too (#537)
Co-authored-by: Johannes Bechberger
2022-02-13 20:34:44 +03:00
Andrei Pangin
456ff57115 OpenJ9 fixes 2022-02-09 04:26:40 +03:00
Andrei Pangin
5ec28a86b2 #548: syntax error in profiler.sh 2022-02-08 20:49:47 +03:00
Andrei Pangin
e7cd6ee6fb OpenJ9 support 2022-02-01 16:04:48 +03:00
Andrei Pangin
8ba8fc748c Minor DWARF fix 2022-01-28 03:42:19 +03:00
Andrei Pangin
5f37bf3ad6 DWARF unwinding used incorrect symbol base 2022-01-26 18:11:41 +03:00
Andrei Pangin
26e9c7aef2 DWARF stack walking fixes 2022-01-25 03:53:14 +03:00
Andrei Pangin
8efba10acc JfrReader backward compatibility 2022-01-25 01:56:33 +03:00
Andrei Pangin
e894420119 #531: Rewrite StackMapTable correctly 2022-01-23 01:23:25 +03:00
Andrei Pangin
2ddd4d230c #416: Improve reliability of stack recovery from [not_walkable_not_Java] 2022-01-21 08:55:09 +03:00
Andrei Pangin
1398e7ef75 #215: DWARF stack unwinding 2022-01-20 03:45:37 +03:00
Andrei Pangin
3456dd3d90 Patch Global Offset Table to hook libc functions 2022-01-19 02:48:36 +03:00
Andrei Pangin
c0d45fecec Updated links to version 2.6 2022-01-11 00:10:49 +03:00
Yonatan Goldschmidt
7e5f8a03f3 Fail arguments parsing upon unknown argument (#503) 2022-01-10 03:58:33 +03:00
Andrei Pangin
ce91abe6d9 Do not show 'GC_active' for compiler threads 2022-01-10 01:48:13 +03:00
Andrei Pangin
4ba7524d7c Release 2.6 2022-01-09 21:10:30 +03:00
Andrei Pangin
734ef03ebf Include shared library names in JFR output 2022-01-08 04:08:24 +03:00
Andrei Pangin
cf17e5efc3 #513: Protect native stack walking from crashes 2022-01-06 05:20:15 +03:00
Andrei Pangin
35b420a941 Compute FlameGraph depth when minwidth > 0 2022-01-06 01:35:27 +03:00
Andrei Pangin
ee4cd8e2b6 Revived workaround for slow JVM TI functions 2022-01-05 23:39:51 +03:00
Andrei Pangin
6f3134e99f Pass chunksize & jstackdepth options to the Flight Recorder in jfrsync mode 2022-01-05 22:21:44 +03:00
Andrei Pangin
c537b8298d Do not spoil VM.log decorators 2022-01-05 05:56:59 +03:00
Andrei Pangin
605550cf96 Off-by-one bug 2022-01-05 03:55:23 +03:00
Andrei Pangin
a57bbf3587 Optimize compilation with -fwhole-program 2022-01-04 02:17:30 +03:00
Andrei Pangin
9131344d61 Minor build system improvements 2022-01-01 21:54:39 +03:00
Andrei Pangin
2d0b9c9921 Avoid JVM crashes related to CompiledMethodLoad bugs and stack walking during GC 2022-01-01 21:34:01 +03:00
Andrei Pangin
30905fda4c Faster compilation, smaller binary 2021-12-26 04:35:04 +03:00
Andrei Pangin
5a11a71db9 Fixed races between flush/stop. Possibility to set timeout as hh:mm:ss 2021-12-19 21:47:11 +03:00
Andrei Pangin
d79a82935f Renamed 'duration' to 'timeout' 2021-12-15 03:41:39 +03:00
Andrei Pangin
995048c2fd #71: Continuous profiling 2021-12-15 03:28:24 +03:00
Andrei Pangin
7331e30ed5 JFR duration should be in nanos, not ticks 2021-12-12 03:06:20 +03:00
Andrei Pangin
1f5e4ca8aa Parse recording settings in JfrReader 2021-12-12 03:05:16 +03:00
Andrei Pangin
7ebed4e8e1 Fixed links to 2.5.1 binaries 2021-12-07 03:09:43 +03:00
Andrei Pangin
170451990b Release 2.5.1 2021-12-05 23:53:26 +03:00
Andrei Pangin
11a1d6d308 Read kernel symbols only for perf_events 2021-12-04 00:05:53 +03:00
Yonatan Goldschmidt
955413db8d Remove double close() call (#514) 2021-12-03 23:41:06 +03:00
Andrei Pangin
8a701b41e3 Fixed unsafe access to Recording fields 2021-12-03 04:09:14 +03:00
Andrei Pangin
dccd4c326a Mark native functions. Redo ZeroInterpreter support. 2021-11-29 04:17:36 +03:00
Andrei Pangin
8771888d28 Make sure async-profiler DSO cannot be unloaded 2021-11-28 22:07:00 +03:00
Andrei Pangin
7d25210d2c #506: Throw exception if output size exceeds string limit 2021-11-27 20:29:30 +03:00
Yonatan Goldschmidt
9087bc57d8 Mmap perf_events pages in fdtransfer (#475) 2021-11-15 23:47:03 +03:00
Andrei Pangin
3e1e1c614a Fixed RedefineClasses recursion when loading via JNI 2021-11-02 16:02:46 +03:00
Yonatan Goldschmidt
b80d163699 Escape backslashes in flamegraph frame names (#487) 2021-11-01 23:30:11 +03:00
Andrei Pangin
9705b66864 Minor rewording in README 2021-10-31 20:32:09 +03:00
Krzysztof Ślusarski
3c33c6aa47 Update README regarding the CodeCache flushing on Java method instrumentation (#483) 2021-10-31 20:15:29 +03:00
Andrei Pangin
4af6b65268 Merge event types (avoid duplicate categories) in jfrsync mode 2021-10-20 02:38:38 +03:00
Andrei Pangin
03c7b36bca Fixed deadlock between Profiler::stop() and flushJfr() 2021-10-20 01:36:17 +03:00
Aleksey Shipilëv
f8b15526b1 Handle OpenJDK C++ Interpreter (#474) 2021-10-11 22:38:27 +03:00
Andrey Pangin
e519fd84c4 Allow JMC to read incomplete JFR recordings 2021-10-01 22:46:12 +03:00
Andrey Pangin
79e1017088 Fixed link to macOS binaries 2021-10-01 05:28:21 +03:00
Andrey Pangin
edbb9e7c03 Release 2.5 2021-10-01 05:22:27 +03:00
Andrey Pangin
7eb15cfcf0 Canvas height should not be more than 32767px 2021-10-01 04:38:18 +03:00
Andrey Pangin
eafbbaea8b Avoid use of certain libstdc++ symbols 2021-10-01 02:42:33 +03:00
Andrey Pangin
d8228a1fec #462: An option to prepend library name to native symbols 2021-09-30 04:40:40 +03:00
Andrey Pangin
8a447481f8 #405: Resolve symlinks to profiler.sh 2021-09-30 02:54:18 +03:00
Andrey Pangin
791077354e Fixed wrong TSC frequency problem. Removed misleading CPUTimeStampCounter event. 2021-09-29 03:15:57 +03:00
Andrey Pangin
e071b9fa14 Use global CodeCache boundaries instead of _java_methods/_runtime_stubs where possible 2021-09-28 21:17:21 +03:00
Andrey Pangin
cc340923b9 #458: Wrong 'Matched %' when searching flame graphs with large values 2021-09-23 01:05:59 +03:00
Andrey Pangin
9c333219b5 Parse incomplete JFR files 2021-09-23 01:03:23 +03:00
Andrey Pangin
9acf8c1648 Ticks vs. nanos fixes 2021-09-23 00:24:34 +03:00
Andrey Pangin
5570afed9d Use RDTSC for JFR timestamps when possible 2021-09-21 05:33:22 +03:00
Andrey Pangin
975a506d83 Read JFR chunks lazily; support combined recordings 2021-09-20 01:42:42 +03:00
Andrey Pangin
09fb14bd87 Combine JFR recording with async-profiler samples in a single .jfr file 2021-09-19 22:45:01 +03:00
Andrey Pangin
3256d824de The workaround for #295 didn't work sometimes 2021-09-16 01:05:23 +03:00
Andrey Pangin
cc98710a6f [ppc64] Fixed native stack walking in wall-clock profiler 2021-09-12 02:44:58 +03:00
Gunter Haug
452f14c3d2 Port async-profiler 2.0 to Power Linux little endian (linuxppc64le) (#459) 2021-09-11 16:54:26 +03:00
Andrey Pangin
be8bba1900 Fixed application termination during JFR profiling 2021-09-10 01:05:11 +03:00
Andrey Pangin
65b5356ace Fixed sendfile() not working on 5.10+ kernels 2021-09-08 23:42:57 +03:00
Andrey Pangin
e91363c05a Fixed compiler warning 2021-09-08 21:19:58 +03:00
Andrey Pangin
3e47bf7551 #454: SCAN_STACK recovery results in impossible stack traces 2021-09-08 02:41:37 +03:00
Kirill Timofeev
9447068af3 DCEVM is also hotspot (#457) 2021-09-07 19:11:27 +03:00
Andrey Pangin
7e750825da Add lines/bci information in a Flame Graph 2021-09-06 03:57:48 +03:00
Andrey Pangin
eda5779552 Smallish naming and stylistic changes 2021-09-06 01:26:32 +03:00
Yonatan Goldschmidt
fc3b1ca84f Profile low-privileged processes with perf_events (#411) 2021-09-05 22:13:01 +03:00
Andrey Pangin
d13de48c0a #402: Changes in shared code required for ppc64le port 2021-09-05 21:15:46 +03:00
Andrey Pangin
552c699687 Fix package names for hidden/anonymous classes 2021-09-01 22:36:52 +03:00
Andrey Pangin
0fcc4d9bac #360: Chunked JFR. Support JFR files more than 2 GB. 2021-09-01 01:08:54 +03:00
Andrey Pangin
868bfec2a5 Dump results while profiling session is running. Switch JFR chunks. 2021-08-20 03:32:21 +03:00
Andrey Pangin
4a77d68bcb Sync with jattach 2.0: psutil interface, better container support, Open9 VMs 2021-08-11 23:41:54 +03:00
Andrey Pangin
a38a375dc6 Compatibility with debug builds of JDK 8 2021-08-01 02:59:33 +03:00
Andrey Pangin
6bcd23fcf0 An option to group threads by scheduling policy 2021-07-29 01:06:04 +03:00
Andrey Pangin
8d2847a032 Updated the list of supported platforms 2021-07-28 01:20:12 +03:00
Andrey Pangin
e0998af713 Rework VM shutdown issue: avoid calling Profiler destructor 2021-07-19 02:42:31 +03:00
Andrey Pangin
01b3e6c517 Use standard JVM TI stack walker where possible. Record BCI in alloc/lock events 2021-07-11 20:13:27 +03:00
Andrey Pangin
def6eb4b1c Profile concurrent locks without JVM symbols 2021-07-11 16:57:04 +03:00
Andrey Pangin
4032c56caf Extend CompiledMethodLoad bug workaround to JDK < 11 2021-07-07 02:12:06 +03:00
Andrey Pangin
9b789f6516 Workaround JDK bug related to posting CompiledMethodLoad event 2021-07-07 00:28:34 +03:00
Andrey Pangin
6ddaf9ab71 Log messages in JFR. List native libraries in JFR. 2021-07-01 03:12:49 +03:00
Andrey Pangin
11131499ab One more fix of the race condition during VM shutdown 2021-06-30 02:47:23 +03:00
Andrey Pangin
d2abac1c30 Fixed 'check' command for multi-event profiling 2021-06-14 23:50:17 +03:00
Andrey Pangin
b7e9079b52 Fixed possible access to freed memory caused by a race during VM shutdown 2021-06-14 00:11:44 +03:00
Andrey Pangin
ff49ccccb7 #390: Fixes for macOS/M1 build. Sign macOS package. 2021-06-13 15:05:05 +03:00
Andrey Pangin
7dd075cca6 #440: Fixed crash when RetransformClasses is called with invalid arguments 2021-06-09 19:30:11 +03:00
Simon Legner
bd8078bc11 Enable GitHub Actions (#435) 2021-06-06 23:59:01 +03:00
Simon Legner
1622fe5d72 Travis CI: use Ubuntu 18.04 (Bionic Beaver) (#434) 2021-06-06 22:46:07 +03:00
Andrey Pangin
44d7941728 #390: Run profiler natively on Apple M1 chip 2021-05-30 20:07:15 +03:00
Andrey Pangin
d23b40048b Removed stackFrame_aarch64 (to be re-implemented from scratch) 2021-05-30 20:03:10 +03:00
Andrey Pangin
3b2db709ff #433: Skip AArch64 mapping symbols 2021-05-30 02:26:14 +03:00
Andrey Pangin
096fc88c82 Attempt to read temporary output file through /proc/[pid]/root 2021-05-17 01:45:40 +03:00
Yonatan Goldschmidt
4b0303916d Read log file via /proc/pid/root on Linux (#413) 2021-05-17 01:33:17 +03:00
Andrey Pangin
9fb2ca800a #414: Warn about conflict with another agent 2021-05-12 23:50:57 +03:00
Andrey Pangin
d917cfdb63 #415: Fixed escaping of ' character 2021-05-12 20:38:18 +03:00
Uri Baghin
f2006f3da1 Use time types instead of long. (#428) 2021-05-09 23:09:17 +03:00
Andrey Pangin
c30b22f204 #419: Count the specified argument of a function. Added munmap to the list of known functions 2021-05-03 03:04:27 +03:00
Andrey Pangin
f48ebcc72b #418: Cap Flame Graph height to 32767px 2021-05-03 00:10:55 +03:00
Andrey Pangin
339aee5cfc #379: Generate Flame Graph title depending on the event and --total 2021-05-02 23:37:15 +03:00
Andrey Pangin
cde3fae978 #423: Use Java 6 target only when building 'release' 2021-04-29 04:14:19 +03:00
Andrey Pangin
91eab91634 #414: Fixed infinite CodeCache growth 2021-04-11 22:44:00 +03:00
Andrey Pangin
3df00e3439 #150: jfr2flame can produce Allocation and Lock flame graphs 2021-04-11 21:47:15 +03:00
Andrey Pangin
c66ac2cfd0 Removed unused function 2021-04-04 21:15:08 +03:00
Andrei Pangin
f236482228 Raw PMU events; kprobes & uprobes (#406) 2021-03-28 23:35:08 +03:00
Andrey Pangin
3ff315ea8f Release 2.0 2021-03-14 20:19:13 +03:00
Andrey Pangin
308074a9eb Print error if JVM does not support Tool Interface (e.g. minimal VM) 2021-03-14 16:49:11 +03:00
Andrey Pangin
685da8d84f Update native thread names on Zing 2021-03-14 16:29:26 +03:00
Andrey Pangin
bd7bf9726e Add [unknown_Java] frame in safe mode 2021-03-14 15:22:34 +03:00
Andrey Pangin
034677435d Fixed handling of some profiler.sh arguments 2021-03-14 14:49:17 +03:00
Andrey Pangin
89f7d34456 Added new safe mode for troubleshooting 2021-03-14 01:03:41 +03:00
Andrey Pangin
ec8a40431a An alias for time-to-safepoint profiling: --ttsp 2021-03-13 01:17:19 +03:00
Andrey Pangin
81bc1f2df2 Support appending JFR profiles on 'resume' command 2021-03-13 00:46:43 +03:00
Andrey Pangin
40ff09a14f Combine async-profiler recording with JDK Flight Recording 2021-03-13 00:14:52 +03:00
Andrey Pangin
d6de541799 Corrected README links 2021-03-04 00:44:33 +03:00
Andrey Pangin
b807987f1d 2.0 Release Candidate 2021-03-04 00:22:42 +03:00
Andrey Pangin
02875138f1 Updated README 2021-03-03 23:52:30 +03:00
Andrey Pangin
d1c19d1904 Updated CHANGELOG. New FlameGraph example 2021-03-03 04:14:51 +03:00
Andrey Pangin
cc2307b92c 'cstack' option now correctly works in multievent profiles 2021-03-03 00:39:47 +03:00
Andrey Pangin
b5d89fef29 --begin/--end compatibility with allocation profiling 2021-03-02 05:11:46 +03:00
Andrey Pangin
646a92e2a0 Improved CLI experience 2021-03-02 02:35:29 +03:00
Andrey Pangin
bcd2375f39 Merge branch 'master' into v2.0
# Conflicts:
#	CHANGELOG.md
#	Makefile
#	README.md
2021-03-01 04:49:53 +03:00
Andrey Pangin
3cbe6aec2f Multievent fixes; 'alloc=bytes' and 'lock=duration' options 2021-03-01 04:41:00 +03:00
Andrey Pangin
2d51c07b23 Output profile in text format: traces + flat 2021-02-28 19:51:24 +03:00
Andrey Pangin
32ead969c1 Graceful handling of storage overflow 2021-02-28 00:38:00 +03:00
Andrey Pangin
de55fadbba JFR writer fixes 2021-02-25 00:33:20 +03:00
Andrey Pangin
da0ac08c64 Correct PerfEvents stack traces when begin/end options are set 2021-02-24 08:02:13 +03:00
Andrey Pangin
5dd9e86a1d Release 1.8.4 2021-02-24 02:50:58 +03:00
Andrey Pangin
81583b9af3 Merge branch 'master' into v2.0
# Conflicts:
#	src/profiler.cpp
2021-02-24 02:34:48 +03:00
Andrey Pangin
7ec5c195e7 Better error handling 2021-02-24 02:31:48 +03:00
Andrey Pangin
cb0f1eb72d Fixed JDK 7 crash during wall-clock profiling 2021-02-20 03:41:37 +03:00
Andrey Pangin
051424890a Recover stack traces below C1 Runtime stubs 2021-02-03 02:08:59 +03:00
Andrey Pangin
34daf4f540 #386: Added a note about IntelliJ IDEA 2021-02-01 19:11:59 +03:00
Andrey Pangin
3a44bb6ba6 Merge branch 'master' into v2.0 2021-01-29 02:05:55 +03:00
Andrey Pangin
f73ac36c9c Fixed symbol resolution when return address points beyond the function 2021-01-29 02:05:26 +03:00
Andrey Pangin
c94b1685cf Merge branch 'master' into v2.0 2021-01-29 01:33:50 +03:00
Andrey Pangin
90d4420d3f Make all symbols private by default for better compiler optimization 2021-01-29 01:32:37 +03:00
Andrey Pangin
a96501a26a Enable native stacks for non-signal events, e.g. lock profiling 2021-01-29 00:09:15 +03:00
Andrey Pangin
39f84be219 Write profiler settings in JFR 2021-01-27 00:12:29 +03:00
Andrey Pangin
4af327e2c1 Write JVM info, system properties, and profiler version in JFR 2021-01-26 02:43:46 +03:00
Andrey Pangin
61919df2ff Make symbols private by default to improve gcc optimizations 2021-01-26 02:42:43 +03:00
Andrey Pangin
26880ecb22 #93: Basic POM for publishing async-profiler Java API to Maven Central 2021-01-14 05:05:10 +03:00
Ivan Zemlyanskiy
af02f6b0fb Migrate documentation from README.md to Wiki (#383) 2021-01-14 00:32:31 +03:00
Andrey Pangin
c11d4ca487 Add OS and CPU information in JFR output 2021-01-12 05:07:02 +03:00
Andrey Pangin
b2dfe9b5b0 Fixed compilation on JDK 7 2021-01-10 20:51:02 +03:00
Andrey Pangin
5585a77355 Merged master->v2.0 2021-01-10 20:46:59 +03:00
Andrey Pangin
b5a67c2b95 Release 1.8.3 2021-01-06 17:53:37 +03:00
Andrey Pangin
9aea04a56a New safemode=32 for sanity check of top Java frames 2021-01-06 17:44:34 +03:00
Andrey Pangin
a48f77b380 #377: Fix JvmtiEnv::GetStackTrace problem after RedefineClasses 2020-12-24 03:05:01 +03:00
Andrey Pangin
8c5f6c1357 Gracefully stop profiler when terminating JVM 2020-12-24 02:58:38 +03:00
Andrey Pangin
88730d4388 Fixed possible deadlock on non-HotSpot JVMs 2020-12-24 02:57:56 +03:00
Andrey Pangin
d132777a60 #378: Create libasyncProfiler.dylib symlink on macOS 2020-12-10 01:12:40 +03:00
Andrey Pangin
04dac10d41 JFRv2 parser. Added JFR->FlameGraph converter; fixed FlameScope converter 2020-12-06 21:20:41 +03:00
Andrey Pangin
5290b81190 Attempt to recover stack trace from String.indexOf intrinsic 2020-11-15 23:38:42 +03:00
Andrey Pangin
93e1f963ef Links to v2.0 Early access 2020-11-09 04:51:48 +03:00
Andrey Pangin
a18af69f8b Minor build fixes 2020-11-09 04:50:02 +03:00
Andrey Pangin
60cac04c24 2.0-b1 Early Access 2020-11-09 04:33:35 +03:00
Andrey Pangin
3d7e8efd3b Changelog 2020-11-09 04:15:22 +03:00
Andrey Pangin
d26d69e550 Returned tree output format 2020-11-09 04:14:15 +03:00
Andrey Pangin
8160e49c14 Dump flat profile in text format 2020-11-07 04:04:18 +03:00
Andrey Pangin
731ac31064 Bias JFR buffers to threads. Distinguish TLAB/outside allocations in Flame Graph 2020-11-07 00:21:30 +03:00
Andrey Pangin
013ceee55d Resurrected FlameGraph and collapsed output formats 2020-11-05 04:54:19 +03:00
Andrey Pangin
f7ef0e97b2 Clean-room FlameGraph implementation. Removed 3rd party copyrighted code. No more CDDL license 2020-11-04 20:47:43 +03:00
Andrey Pangin
c01fe588ce Merge branch 'master' into v2.0 2020-11-04 18:32:58 +03:00
Andrey Pangin
e498ad27d2 Improved HTML FlameGraph performance 2020-11-04 04:29:36 +03:00
Andrey Pangin
edb31a0f79 Merge branch 'master' into v2.0
# Conflicts:
#	CHANGELOG.md
#	src/allocTracer.cpp
#	src/allocTracer.h
2020-11-02 03:54:29 +03:00
Andrey Pangin
13394b7125 Release 1.8.2 2020-11-02 02:54:32 +03:00
Andrey Pangin
d227a83e42 Fixed warnings on JDK 15 and 16 2020-11-02 02:16:41 +03:00
Andrey Pangin
7e8ad02ccb Fixed allocation sizes on JDK 8u262+ 2020-11-02 00:40:47 +03:00
Andrey Pangin
450f251732 Support 32-bit systems 2020-11-01 04:40:25 +03:00
Andrey Pangin
53ca190457 Merge branch 'master' into v2.0 2020-11-01 02:21:16 +03:00
Andrey Pangin
683144a907 Release 1.8.2 2020-11-01 00:58:29 +03:00
Andrey Pangin
02b65627cd Merge branch 'master' into v2.0
# Conflicts:
#	src/flightRecorder.cpp
#	src/profiler.h
2020-10-31 23:09:24 +03:00
Andrey Pangin
48e4fd5035 #363: New native libraries are not tracked in JDK 15 2020-10-31 22:46:38 +03:00
Andrey Pangin
642a1ac7fb Timers for macOS and Linux; jdk.CPULoad event 2020-10-31 02:54:12 +03:00
Andrey Pangin
114e711fd6 jdk.ActiveRecording event 2020-10-30 03:59:41 +03:00
Andrey Pangin
f833f41b46 jdk.CPULoad event 2020-10-30 03:10:21 +03:00
Andrey Pangin
a82163b703 Line numbers in JFR output 2020-10-26 01:15:58 +03:00
Andrey Pangin
6b49cfa9be JFRv2 2020-10-21 03:41:12 +03:00
Andrey Pangin
6c26e5ae69 Added link to AArch64 build 2020-10-18 17:37:44 +03:00
Andrey Pangin
1634380a16 Added tag for aarch64 release 2020-10-18 04:04:52 +03:00
Andrey Pangin
1a6e582ad7 #356: 'resume' command continues writing JFR instead of creating a new file 2020-10-17 23:44:27 +03:00
Andrey Pangin
4b5a17b336 #350: More careful native stack walking in wall-clock mode 2020-09-22 03:05:27 +03:00
Andrey Pangin
8392e568f4 #351: Updated README instructions to check libjvm debug symbols 2020-09-22 00:43:01 +03:00
Andrey Pangin
d7d56c762b Merge branch 'master' into v2.0
# Conflicts:
#	src/instrument.cpp
#	src/profiler.h
2020-09-14 21:54:47 +03:00
Andrey Pangin
a4c6d42677 Release 1.8.1 2020-09-05 01:02:19 +03:00
Andrey Pangin
b7e907884b Fixed profiler check command after #347 2020-09-04 04:26:05 +03:00
Andrey Pangin
5b69492dba #347: Do not read /proc/kallsyms when --all-user is specified 2020-09-04 02:14:59 +03:00
James Yuzawa
5a789bda42 Clean up debug symbols instructions and troubleshooting (#345) 2020-09-03 00:31:05 +03:00
James Yuzawa
a010f387b3 Specify JVM process by 'jps' application name (#346) 2020-09-03 00:17:46 +03:00
Andrey Pangin
61d5cdcd68 Profile multiple events from the command line 2020-08-30 03:02:43 +03:00
Andrey Pangin
2b14ee69ef #340: UnsatisfiedLinkError when attaching method profiler 2020-08-12 23:02:26 +03:00
Andrey Pangin
048b54621d Redo fix for #328 2020-08-12 23:02:05 +03:00
Andrey Pangin
94d406c531 Revert README 2020-08-11 22:37:10 +03:00
Andrey Pangin
800580bb30 Release 1.8 2020-08-10 23:18:20 +03:00
Andrey Pangin
8cecd2df9b #339: Report invalid interval argument 2020-08-09 18:50:59 +03:00
Andrey Pangin
d86883043a #330: Release packages should be extracted into separate folder 2020-08-09 18:35:26 +03:00
Andrey Pangin
d0772ba62c Added collapsed->FlameGraph converter 2020-08-09 16:06:13 +03:00
Andrey Pangin
d6d4a3c2a3 FlameGraph: skip empty lines 2020-08-06 23:07:28 +03:00
Andrey Pangin
49f9050bf5 FlameGraph minor cosmetic changes 2020-08-06 01:55:47 +03:00
Andrey Pangin
67b77b9645 Improved FlameGraph converter 2020-08-05 02:04:19 +03:00
Andrey Pangin
971fc85d1c FlameGraph 2.0 on HTML5 canvas 2020-08-04 23:26:04 +03:00
Andrey Pangin
50b9fe4d85 Merged with master 2020-08-04 19:52:27 +03:00
Jason Zaugg
f9db1099f9 Register natives one-by-one to support partial Java API implementations (#337) 2020-08-04 19:05:08 +03:00
Andrey Pangin
adce201837 #335: Fixed unsafe thread local storage access 2020-07-29 23:26:39 +03:00
Andrey Pangin
a905d50e00 #328, #14: Fixed long attach time and slow class loading 2020-07-29 22:20:45 +03:00
Andrey Pangin
f006e00443 async-profiler 2.0: Record cpu+alloc+lock in a single JFR. No framebuffer/hashtable limits. 2020-07-27 01:35:07 +03:00
Andrey Pangin
5ef449c2ed #335: Do not restart poll() calls with finite timeout 2020-07-20 00:14:48 +03:00
Andrey Pangin
d9ca3e42a8 #329: Support both ARM and THUMB flavors of JDK binaries 2020-06-19 02:37:19 +03:00
Claus F. Strasburger
269bef2867 profiler.sh: work on minimal systems (#303) 2020-06-15 00:28:30 +03:00
Andrey Pangin
e62cb2cfd1 #327: Per-thread reverse flamegraph / call tree 2020-05-31 08:34:13 +03:00
Andrey Pangin
7135840f70 Compilation fix 2020-05-25 01:32:53 +03:00
Andrey Pangin
31ddc2f562 #248: Converter to the format supported by FlameScope 2020-05-25 00:33:42 +03:00
Sergei Egorov
a5beee66ff Update the Java Agent options link to the latest release (#322) 2020-05-17 23:35:36 +03:00
Andrey Pangin
c15439348f javadoc comment 2020-05-17 15:15:36 +03:00
Andrey Pangin
17fe36e43e Release 1.7.1 2020-05-14 03:53:03 +03:00
Andrey Pangin
5312a793ec 'safemode' option to disable stack recovery techniques 2020-05-14 02:34:30 +03:00
Kirill Timofeev
4d43db91e1 Ensure code blob exists properly (#316)
Check that code blob is not removed to avoid returning NULL
when another code blob loaded at similar address range
that was used by removed one

Co-authored-by: Simon Ogorodnik <Simon.Ogorodnik@jetbrains.com>
2020-05-13 21:49:29 +03:00
Andrey Pangin
0020af54a3 LBR call stack support (--cstack lbr) 2020-04-21 23:47:53 +03:00
Andrey Pangin
f67d392ad8 Synthesize symbol names for PLT entries 2020-04-21 22:39:13 +03:00
Andrei Pangin
ff70da1736 Added --filter <thread-ids> for wall-clock profiling mode (#315) 2020-04-15 00:27:56 +03:00
Andrey ``Bass'' Shcheglov
07438daa70 Clean up the Makefile (#314) 2020-04-15 00:26:01 +03:00
Andrey Pangin
e1e8aa068a #310: Fixed crash on Zing 2020-03-27 18:20:12 +03:00
Andrey Pangin
a2691f919e #309: README paragraph about missed output file 2020-03-19 18:25:08 +03:00
Andrey Pangin
f496a167fe Release 1.7 2020-03-17 23:29:44 +03:00
Andrey Pangin
d9a1252550 JDK 14 compatibility: late load of libjava.so 2020-03-17 22:58:59 +03:00
Andrey Pangin
f3ca611267 Flush profiler output on 'version' command 2020-03-16 00:33:09 +03:00
Andrey Pangin
119da0fcb2 Replace unsafe calls to JVMTI GetStackTrace with manually patched AsyncGetCallTrace 2020-02-29 01:54:51 +03:00
Andrey Pangin
6bb7f749c9 Fixed build 2020-02-24 21:41:54 +03:00
Andrey Pangin
9593745098 #187: Filter stack traces by the given name pattern 2020-02-24 19:54:49 +03:00
Felix Barnsteiner
e891ecd9da Add NOTICE with CDDL license header (#301) 2020-02-19 20:20:23 +03:00
Andrey Pangin
b8493976b6 Version 1.7-ea3 2020-02-17 00:07:11 +03:00
Andrey Pangin
54b85dc718 'check' command to test if the specified event is available without starting profiler 2020-02-16 23:26:51 +03:00
Andrey Pangin
675a28fdc2 #300: Method invocation profiling by pattern: '-e java.io.File.*' 2020-02-16 20:52:53 +03:00
Andrey Pangin
1a4437999b Use different signals for cpu/wall/alloc to allow profiling multiple events at the same time 2020-02-16 20:00:59 +03:00
Andrey Pangin
ee2438e25f #277: Removed AsyncProfiler.getNativeThreadId() 2020-02-16 19:00:34 +03:00
Andrey Pangin
cd062fead9 Allow mangled function names, e.g. -e VMThread::execute 2020-02-16 18:11:02 +03:00
Andrey Pangin
156389f11a #133: An option to exclude native stack frames 2020-02-16 04:13:23 +03:00
Andrey Pangin
8373224395 #293: Allow shading of AsyncProfiler API (automatic discovery of the class with native methods) 2020-02-16 02:16:23 +03:00
Andrey Pangin
fe6c4ddeda Version 1.7-ea2 2020-02-13 03:15:28 +03:00
Andrey Pangin
98ac0c58d6 #281: Note about aqua/brown frames in an allocation Flame Graph 2020-02-13 02:35:45 +03:00
Andrey Pangin
b57106b858 Reduce the number of native methods in Java API 2020-02-13 02:16:52 +03:00
Andrey Pangin
c204e28348 #277: Record Java Thread ID in JFR output 2020-02-12 04:43:27 +03:00
Andrey Pangin
fc17386ec0 Display native thread names 2020-02-12 03:40:01 +03:00
Andrey Pangin
49d86abc6c #252: Record thread names in JFR output 2020-02-12 02:35:55 +03:00
Andrey Pangin
9fc97fc681 #279, #287, #296: Wall clock profiler improvements:
- stable interval
 - thread states (runnable vs. sleeping)
 - Java API to update set of monitored threads
2020-02-11 03:26:19 +03:00
Andrey Pangin
9b24fdef99 #290: Remove problematic optimization 2020-01-31 03:53:12 +03:00
Andrey Pangin
e37059d409 Version 1.7-ea 2020-01-27 01:49:06 +03:00
Andrey Pangin
869058b56b #295: Workaround for JDK 11 bug in ServerSocket.accept() 2020-01-27 01:28:47 +03:00
Andrey Pangin
11b0f4598e Correct stack traces when executing VM runtime code 2020-01-24 03:26:26 +03:00
Andrey Pangin
cc9cee7bec Fixed instrumentation of anonymous classes 2020-01-23 03:01:02 +03:00
Andrey Pangin
5ae46d2312 #290: CodeCache for compiled methods and runtime stubs must be handled separately 2020-01-21 03:34:52 +03:00
Andrey Pangin
776b5597bf #286: Enable CPU profiling on WSL 2020-01-07 20:38:00 +03:00
Andrey Pangin
e282b76880 #286: Add an error message if ITIMER_PROF is not supported 2020-01-07 19:14:18 +03:00
Andrey Pangin
e47d7f408f #277: getNativeThreadId() now returns tid of the current Thread 2020-01-07 18:36:19 +03:00
KUBOTA Yuji
5044869ecd Make release version of javac configurable (#285) 2020-01-06 18:59:58 +03:00
Andrey Pangin
9516f54311 Better C stack in allocation profiling mode 2020-01-06 05:17:56 +03:00
Andrey Pangin
9bd414411f --cstack option to collect native stack traces for Java-level events 2020-01-06 04:36:35 +03:00
Andrei Pangin
0334c5900e Profile invocations of an arbitrary Java method
I.e. record all stack traces, where the methods is called.
2020-01-06 03:20:31 +03:00
Andrey Pangin
f502979135 Enable allocation profiling on Zing 2019-12-23 00:34:04 +03:00
Andrey Pangin
d89fc3fbdc Java thread <-> VMThread bridge 2019-12-22 23:51:23 +03:00
Andrey Pangin
9279531cf8 #277: Java API for getting native thread ID 2019-12-22 21:59:26 +03:00
Per Lundberg
9de1a63542 README.md: minor grammar fixes (#283) 2019-12-18 14:07:25 +03:00
Andrey Pangin
21af257716 #271: Reduce the amount of unknown_Java even more 2019-12-17 01:49:06 +03:00
Andrey Pangin
28ed6f490e Intercept Thread.setNativeName 2019-12-17 01:34:57 +03:00
Andrey Pangin
dc4f01dd14 Removed syncwalk argument (no longer needed) 2019-12-13 03:57:58 +03:00
Andrey Pangin
1e3a4b77ee #271: Further reduce the amount of unknown/not_walkable frames 2019-12-13 03:52:00 +03:00
Andrey Pangin
4cec7a3bb0 #271: Reduce the amount of unknown_Java frames 2019-12-12 02:45:51 +03:00
tomgoren
2557363892 Update README with elevated permissions syntax updates and extra Docker runtime flag (#270) 2019-11-28 01:36:10 +03:00
Andrey Pangin
78035134f4 #263: Replace non-printable characters in function names 2019-11-23 18:22:18 +03:00
Andrey Pangin
0ef1122a3b #266: Fix [unknown] frames due to kptr_restrict 2019-11-17 05:36:47 +03:00
RoySunnySean007
8bb57de1d1 #265: Update README.md about installing debuginfo package 2019-11-14 13:09:59 +03:00
Andrey Pangin
11d74b73af #262: Fixed NativeLibrary.load0 signature on JDK 9+ (NoSuchMethodError) 2019-10-28 02:02:55 +03:00
Andrei Pangin
a759960bb0 Removed the note about reducing TLAB size
Reducing TLAB size in production is discouraged.
2019-10-26 04:29:26 +03:00
Andrey Pangin
7edcd2660a #256: Fixed crash on Zing JVM 2019-10-17 04:51:37 +03:00
Andrey Pangin
a97a5cae13 #255: Truncate too long signatures 2019-10-15 02:47:13 +03:00
Dmitry Timofeev
d2e7e2718c Document the placeholders in the file name (#254) 2019-10-12 18:51:08 +03:00
Andrey Pangin
e1c3100c60 Fixed lock profiling on some macOS JDK builds 2019-10-04 19:29:47 +03:00
Andrey Pangin
93c63d50d5 --sync-walk option to use alternative stack walker in expert mode 2019-09-30 21:04:42 +03:00
Andrey Pangin
7e6db636d8 #250: Print error message when failed to parse symbols due to the OS bug 2019-09-29 22:04:57 +03:00
Andrey Pangin
78a83a31b2 #250: Fixed mmap bug when parsing symbols 2019-09-28 12:07:39 +03:00
Andrey Pangin
adcf89234b Release 1.6 2019-09-09 16:52:39 +03:00
Andrey Pangin
b7e9e6b955 #192: Pause/resume profiling 2019-09-07 13:05:39 +03:00
Andrey Pangin
19e16dc973 #211: The agent autodetects output format by the filename 2019-09-03 03:38:20 +03:00
Andrey Pangin
84602f8660 Added download links to 1.6-ea 2019-09-02 03:28:29 +03:00
402 changed files with 48507 additions and 8194 deletions

View File

@@ -0,0 +1,855 @@
<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='utf-8'>
<style>
:root {--bg: #ffffff; --fg: #000000; --hl-bg: #ffffe0; --hl-border: #ffc000; --link: #0366d6; --legend-bg: #ffffe0; --legend-border: #666666}
:root.dark {--bg: #1e1e1e; --fg: #cccccc; --hl-bg: #3a3a00; --hl-border: #8a7000; --link: #58a6ff; --legend-bg: #333333; --legend-border: #888888}
body {margin: 0; padding: 10px 10px 22px 10px; background-color: var(--bg); color: var(--fg)}
h1 {margin: 5px 0 0 0; font-size: 18px; font-weight: normal; text-align: center}
header {margin: -22px 0 6px 0}
button {border: none; background: none; width: 24px; height: 24px; cursor: pointer; margin: 0; padding: 2px 0 0 0; text-align: center}
button:hover {background-color: var(--hl-bg); outline: 1px solid var(--hl-border); border-radius: 4px}
dl {margin: 0 4px 8px 4px}
dt {margin: 1px; padding: 2px 0; font-weight: bold}
dd {margin: 1px; padding: 2px 4px}
dl.frames {float: left; width: 160px}
dl.frames > dd {color: #000000}
dl.hotkeys {clear: left; border-top: 1px solid var(--legend-border)}
dl.hotkeys > dt {float: left; clear: left; width: 158px; margin-right: 4px; text-align: right}
dl.hotkeys > dd {float: left}
p {position: fixed; bottom: 0; margin: 0; padding: 2px 3px 2px 3px; outline: 1px solid var(--hl-border); display: none; overflow: hidden; white-space: nowrap; background-color: var(--hl-bg); color: var(--fg)}
a {color: var(--link)}
#legend {padding: 4px; border-radius: 4px; background: var(--legend-bg); border: 1px solid var(--legend-border); display: none}
#hl {position: absolute; display: none; overflow: hidden; white-space: nowrap; pointer-events: none; background-color: var(--hl-bg); outline: 1px solid var(--hl-border); height: 15px}
#hl span {padding: 0 3px 0 3px}
#status {left: 0}
#match {right: 0}
#reset {cursor: pointer}
#canvas {width: 100%; height: 576px}
</style>
<script>
{
let theme;
try { theme = localStorage.getItem('flame-theme'); } catch (ignored) {}
if (theme ? theme === 'dark' : matchMedia('(prefers-color-scheme: dark)').matches) {
document.documentElement.classList.add('dark');
}
}
</script>
</head>
<body style='font: 12px Verdana, sans-serif'>
<h1>CPU profile</h1>
<header style='float: left'>
<button id='inverted' title='Invert (I)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 392 392'><path d='M196,36 L316,156 L76,156 Z' fill='#004d80'/><path d='M196,356 L76,236 L316,236 Z' fill='#004d80'/><path d='M196,54 L298,156 L94,156 Z' fill='#ff8d40'/><path d='M196,338 L94,236 L298,236 Z' fill='#40b2ff'/><rect x='94' y='188' width='204' height='16' fill='#004d80'/></svg></button>
<button id='search' title='Search (Ctrl+F)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='-39.3 -39.3 471.1 471.1'><circle cx='147.7' cy='147.8' r='125.9' fill='#fff'/><path fill='#40b2ff' d='M370.7 348.7c0 1.4-1.6 6.3-7.2 12.3-6.2 6.7-12.5 9.8-14.7 9.8h-.1c-19.5-1.6-62-43.2-109.6-106.8 9.2-7.2 17.5-15.5 24.6-24.6 63.6 47.6 105.2 90.2 106.8 109.6z'/><path fill='#ff8d40' d='M208.7 86.9l-14.5 14.5c-17.1 17.1-46.5 5-46.5-19.3V61.6c-49 0-88.4 40.8-86.1 90.2 2 43.9 38.1 80 82 82 49.5 2.3 90.2-37.2 90.2-86.1 0-23.7-9.6-45.2-25.1-60.8z'/><path fill='#004d80' d='M276.1 221c12.3-21.5 19.5-46.5 19.5-73.2C295.6 66.3 229.2.1 147.7.1S0 66.3 0 147.9s66.3 147.7 147.7 147.7c26.6 0 51.5-7.1 73.2-19.5 39.8 53.3 91.9 113.5 126.1 116.4 12.3.5 22.9-6.7 32.8-16.7 5.2-5.6 13.8-16.9 12.8-28.8-2.9-34.1-63.1-86.2-116.4-126.1zM147.7 273.8c-69.5 0-125.9-56.5-125.9-125.9S78.3 21.9 147.7 21.9 273.6 78.4 273.6 147.8s-56.4 126-125.9 126zm215.9 87.2c-6.2 6.7-12.4 9.8-14.7 9.8h-.1c-19.5-1.6-62-43.2-109.6-106.8 9.2-7.2 17.5-15.5 24.6-24.6 63.6 47.6 105.2 90.2 106.8 109.6 0 1.4-1.6 6.3-7.2 12.4z'/></svg></button>
<button id='darkmode' title='Toggle dark mode (D)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 20 20'><path d='M10 4a6 6 0 0 1 0 12z' fill='#ff8d40'/><path d='M10 4a6 6 0 0 0 0 12z' fill='#ffffff'/><circle cx='10' cy='10' r='8' fill='none' stroke='#004d80'/></svg></button>
<button id='info'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 20 20'><circle cx='10' cy='10' r='8' stroke='#004d80' fill='none'/><path d='M10 5.5c-1.25 0-2.25 1-2.25 2.25H9a1.25 1.25 0 0 1 2.5 0c0 .65-.55 1-1 1.2-.7.35-1.25.85-1.25 1.8V11h1.5v-.25c0-.37.29-.65.68-.83.73-.34 1.32-.87 1.32-2.17 0-1.25-1.5-2.25-2.75-2.25' fill='#ff8d40' stroke='#ff8d40' stroke-width='.6' stroke-linecap='round' stroke-linejoin='round'/><circle cx='10' cy='13.5' r='1.2' fill='#ff8d40'/></svg></button>
</header>
<header style='float: right'>Produced by <a href='https://github.com/async-profiler/async-profiler'>async-profiler</a></header>
<div id='legend' style='position: absolute'>
<dl class='frames'>
<dt>Frame types</dt>
<dd style='background-color: #e17d00'>Kernel</dd>
<dd style='background-color: #e15a5a'>Native</dd>
<dd style='background-color: #c8c83c'>C++ (VM)</dd>
<dd style='background-color: #50e150'>Java compiled</dd>
<dd style='background-color: #cce880'>Java compiled by C1</dd>
<dd style='background-color: #50cccc'>Inlined</dd>
<dd style='background-color: #b2e1b2'>Interpreted</dd>
</dl>
<dl class='frames'>
<dt>Allocation profile</dt>
<dd style='background-color: #50cccc'>Allocated class</dd>
<dd style='background-color: #e17d00'>Allocation outside TLAB</dd>
<dt>Lock profile</dt>
<dd style='background-color: #50cccc'>Lock class</dd>
<dt>&nbsp;</dt>
<dt>Search</dt>
<dd style='background-color: #ee00ee'>Matches regexp</dd>
</dl>
<dl class='hotkeys'>
<dt>Click frame</dt><dd>Zoom into frame</dd>
<dt>Ctrl/Alt+Click</dt><dd>Remove stack</dd>
<dt>0</dt><dd>Reset zoom</dd>
<dt>I</dt><dd>Invert graph</dd>
<dt>Ctrl+F</dt><dd>Search</dd>
<dt>N</dt><dd>Next match</dd>
<dt>Shift+N</dt><dd>Previous match</dd>
<dt>Esc</dt><dd>Cancel search</dd>
</dl>
</div>
<canvas id='canvas'></canvas>
<div id='hl'><span></span></div>
<p id='status'></p>
<p id='match'>Matched: <span id='matchval'></span> <span id='reset' title='Clear'>&#x274c;</span></p>
<script>
// Copyright The async-profiler authors
// SPDX-License-Identifier: Apache-2.0
'use strict';
let root, px, pattern;
let level0 = 0, left0 = 0, width0 = 0, d = 0;
let nav = [], navIndex, matchval;
let inverted = false;
const U = undefined;
const maxdiff = -1;
const levels = Array(36);
for (let h = 0; h < levels.length; h++) {
levels[h] = [];
}
const canvas = document.getElementById('canvas');
const c = canvas.getContext('2d');
const hl = document.getElementById('hl');
const status = document.getElementById('status');
const canvasWidth = canvas.offsetWidth;
const canvasHeight = canvas.offsetHeight;
canvas.style.width = canvasWidth + 'px';
canvas.width = canvasWidth * (devicePixelRatio || 1);
canvas.height = canvasHeight * (devicePixelRatio || 1);
if (devicePixelRatio) c.scale(devicePixelRatio, devicePixelRatio);
c.font = document.body.style.font;
const palette = [
[0xb2e1b2, 20, 20, 20],
[0x50e150, 30, 30, 30],
[0x50cccc, 30, 30, 30],
[0xe15a5a, 30, 40, 40],
[0xc8c83c, 30, 30, 10],
[0xe17d00, 30, 30, 0],
[0xcce880, 20, 20, 20],
];
function getColor(p) {
const v = Math.random();
return '#' + (p[0] + ((p[1] * v) << 16 | (p[2] * v) << 8 | (p[3] * v))).toString(16);
}
function getDiffColor(diff) {
if (diff === U) return '#ffdd33';
if (diff === 0) return '#e0e0e0';
const v = Math.round(128 * (maxdiff - Math.abs(diff)) / maxdiff) + 96;
return diff > 0 ? 'rgb(255,' + v + ',' + v + ')' : 'rgb(' + v + ',' + v + ',255)';
}
function f(key, level, left, width, inln, c1, int) {
levels[level0 = level].push({level, left: left0 += left, width: width0 = width || width0,
color: maxdiff >= 0 ? getDiffColor(d) : getColor(palette[key & 7]),
title: cpool[key >>> 3],
details: (d ? (d > 0 ? ', +' : ', ') + d : '') + (int ? ', int=' + int : '') + (c1 ? ', c1=' + c1 : '') + (inln ? ', inln=' + inln : '')
});
}
function u(key, width, inln, c1, int) {
f(key, level0 + 1, 0, width, inln, c1, int)
}
function n(key, width, inln, c1, int) {
f(key, level0, width0, width, inln, c1, int)
}
function samples(n) {
return n === 1 ? '1 sample' : n.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ',') + ' samples';
}
function pct(a, b) {
return a >= b ? '100' : (100 * a / b).toFixed(2);
}
function findFrame(frames, x) {
let left = 0;
let right = frames.length - 1;
while (left <= right) {
const mid = (left + right) >>> 1;
const f = frames[mid];
if (f.left > x) {
right = mid - 1;
} else if (f.left + f.width <= x) {
left = mid + 1;
} else {
return f;
}
}
if (frames[left] && (frames[left].left - x) * px < 0.5) return frames[left];
if (frames[right] && (x - (frames[right].left + frames[right].width)) * px < 0.5) return frames[right];
return null;
}
function removeStack(left, width) {
for (let h = 0; h < levels.length; h++) {
const frames = levels[h], newFrames = [];
for (let i = 0; i < frames.length; i++) {
const f = frames[i];
if (f.left >= left + width) {
f.left -= width;
} else if (f.left + f.width > left) {
if ((f.width -= width) <= 0 && h) continue;
}
newFrames.push(f);
}
levels[h] = newFrames;
}
}
function search(r) {
if (r === true && (r = prompt('Enter regexp to search:', '')) === null) {
return;
}
pattern = r ? RegExp(r) : undefined;
const matched = render(root, nav = []);
navIndex = -1;
document.getElementById('matchval').textContent = matchval = pct(matched, root.width) + '%';
document.getElementById('match').style.display = r ? 'inline-block' : 'none';
}
function render(newRoot, nav) {
const bg = getComputedStyle(document.documentElement).getPropertyValue('--bg');
if (root) {
c.fillStyle = bg;
c.fillRect(0, 0, canvasWidth, canvasHeight);
}
root = newRoot || levels[0][0];
px = canvasWidth / root.width;
const x0 = root.left;
const x1 = x0 + root.width;
const marked = [];
function mark(f) {
return marked[f.left] || (marked[f.left] = f);
}
function totalMarked() {
let total = 0;
let left = 0;
Object.keys(marked).sort(function(a, b) { return a - b; }).forEach(function(x) {
if (+x >= left) {
const m = marked[x];
if (nav) nav.push(m);
total += m.width;
left = +x + m.width;
}
});
return total;
}
function drawFrame(f, y) {
if (f.left < x1 && f.left + f.width > x0) {
c.fillStyle = pattern && f.title.match(pattern) && mark(f) ? '#ee00ee' : f.color;
c.fillRect((f.left - x0) * px, y, f.width * px, 15);
if (f.width * px >= 21) {
const chars = Math.floor(f.width * px / 7);
const title = f.title.length <= chars ? f.title : f.title.substring(0, chars - 2) + '..';
c.fillStyle = '#000000';
c.fillText(title, Math.max(f.left - x0, 0) * px + 3, y + 12, f.width * px - 6);
}
if (f.level < root.level) {
c.fillStyle = bg + '80';
c.fillRect((f.left - x0) * px, y, f.width * px, 15);
}
}
}
for (let h = 0; h < levels.length; h++) {
const y = inverted ? h * 16 : canvasHeight - (h + 1) * 16;
const frames = levels[h];
for (let i = 0; i < frames.length; i++) {
drawFrame(frames[i], y);
}
}
return totalMarked();
}
function unpack(cpool) {
for (let i = 1; i < cpool.length; i++) {
cpool[i] = cpool[i - 1].substring(0, cpool[i].charCodeAt(0) - 32) + cpool[i].substring(1);
}
}
canvas.onmousemove = function() {
const h = Math.floor((inverted ? event.offsetY : (canvasHeight - event.offsetY)) / 16);
if (h >= 0 && h < levels.length) {
const f = findFrame(levels[h], event.offsetX / px + root.left);
if (f) {
if (f !== root) getSelection().removeAllRanges();
hl.style.left = (Math.max(f.left - root.left, 0) * px + canvas.offsetLeft) + 'px';
hl.style.width = (Math.min(f.width, root.width) * px) + 'px';
hl.style.top = ((inverted ? h * 16 : canvasHeight - (h + 1) * 16) + canvas.offsetTop) + 'px';
hl.firstChild.textContent = f.title;
hl.style.display = 'block';
canvas.title = f.title + '\n(' + samples(f.width) + f.details + ', ' + pct(f.width, levels[0][0].width) + '%)';
canvas.style.cursor = 'pointer';
canvas.onclick = function() {
if ((event.altKey || event.ctrlKey) && h >= root.level && h > 0) {
removeStack(f.left, f.width);
root.width > f.width ? render(root) : render();
} else if (f !== root) {
render(f);
}
canvas.onmousemove();
};
status.textContent = 'Function: ' + canvas.title;
status.style.display = 'inline-block';
return;
}
}
canvas.onmouseout();
}
canvas.onmouseout = function() {
hl.style.display = 'none';
status.style.display = 'none';
canvas.title = '';
canvas.style.cursor = '';
canvas.onclick = null;
}
canvas.ondblclick = function() {
getSelection().selectAllChildren(hl);
}
document.getElementById('inverted').onclick = function() {
inverted = !inverted;
render();
}
document.getElementById('search').onclick = function() {
search(true);
}
document.getElementById('reset').onclick = function() {
search(false);
}
document.getElementById('darkmode').onclick = function() {
const theme = document.documentElement.classList.toggle('dark') ? 'dark' : 'light';
try { localStorage.setItem('flame-theme', theme); } catch (ignored) {}
render(root);
}
const btnInfo = document.getElementById('info');
const legend = document.getElementById('legend');
btnInfo.onmouseover = function() {
legend.style.left = (btnInfo.offsetLeft + 24) + 'px';
legend.style.top = (btnInfo.offsetTop + 24) + 'px';
legend.style.display = 'block';
}
btnInfo.onmouseout = function() {
legend.style.display = 'none';
}
window.onkeydown = function(event) {
if ((event.ctrlKey || event.metaKey) && event.key === 'f') {
event.preventDefault();
search(true);
return false;
} else if (event.key === 'Escape') {
search(false);
} else if ((event.key === 'n' || event.key === 'N') && nav.length > 0) {
navIndex = (navIndex + (event.shiftKey ? nav.length - 1 : 1)) % nav.length;
render(nav[navIndex]);
document.getElementById('matchval').textContent = matchval + ' (' + (navIndex + 1) + ' of ' + nav.length + ')';
window.scroll(0, inverted ? root.level * 16 : canvasHeight - (root.level + 1) * 16);
canvas.onmousemove();
return false;
} else if (event.key === 'i') {
canvas.onmouseout();
document.getElementById('inverted').onclick();
return false;
} else if (event.key === 'd') {
document.getElementById('darkmode').onclick();
return false;
} else if (event.key === '0') {
canvas.onmouseout();
root = levels[0][0];
search(false);
return false;
}
}
const cpool = [
'all',
' C2Compiler::compile_method',
'!ompilation::Compilation',
'-compile_java_method',
'5method',
'-emit_code_body',
'&e::Code_Gen',
'+mpile',
')Optimize',
'\'Broker::compiler_thread_loop',
'/invoke_compiler_on_method',
'\'r::compile_method',
'"ntiguousSpace::allocate',
' DefNewGeneration::FastEvacuateFollowersClosure::do_void',
'2collect',
'4py_to_survivor_space',
' GenCollectedHeap::collect_generation',
'2do_collection',
'2satisfy_failed_allocation',
'#eration::promote',
' InstanceKlass::allocate_objArray',
'"terpreterRuntime::anewarray',
' JVM_ArrayCopy',
'!avaThread::run',
'$_sun_nio_ch_FileDispatcherImpl_read0',
' Matcher::match',
'!emAllocator::allocate',
' ObjArrayAllocator::initialize',
'!ffsetTableContigSpace::allocate',
' Parse::Parse',
'\'do_all_blocks',
'*call',
'*one_block',
'/ytecode',
'%Generator::generate',
'!haseCFG::do_global_code_motion',
'*global_code_motion',
'*schedule_late',
'4ocal',
'&haitin::Register_Allocate',
'.Split',
'.build_ifg_physical',
'.elide_copy',
'.interfere_with_live',
'.merge_multidefs',
'.post_allocate_copy_removal',
'%IdealLoop::Dominators',
'0build_and_optimize',
'6loop_early',
';late',
';tree',
'0optimize',
'0remix_address_expressions',
'0split_if_with_blocks',
'D_post',
'Fre',
'&terGVN::optimize',
'.subsume_node',
'.transform_old',
'%Live::add_liveout',
'+compute',
'%MacroExpand::expand_macro_nodes',
'!redictedCallGenerator::generate',
' TenuredGeneration::allocate',
'!hread::call_run',
' VMThread::evaluate_operation',
'*inner_execute',
'*run',
'"_GenCollectForAllocation::doit',
'#Operation::evaluate',
' __GI_read',
'"handle_mm_fault',
'"memcpy_sse2_unaligned_erms',
'%set_avx2_unaligned_erms',
' aci_CopyRight',
'!sm_exc_page_fault',
' clear_huge_page',
'&page_erms',
'&subpage',
'"one3',
'!opy_page_to_iter',
'%user_enhanced_fast_string',
' demo8/FileConverter$$Lambda$3.0x00007ffab9001000.apply',
'<4.0x00007ffab9001240.applyAsInt',
'4Entry.<init>',
':equals',
':hashCode',
'3.convertFile',
';List',
'4main',
'4readInput',
'4saveResult',
'!o_huge_pmd_anonymous_page',
'#syscall_64',
'#user_addr_fault',
' entry_SYSCALL_64_after_hwframe',
'!xc_page_fault',
' filemap_read',
' handle_mm_fault',
' java/io/BufferedReader.fill',
'7readLine',
')yteArrayOutputStream.ensureCapacity',
'>toByteArray',
'>write',
'(DataOutputStream.write',
'>Int',
'>UTF',
'(InputStreamReader.read',
'%lang/Integer.parseInt',
'*String.<init>',
'1decodeASCII',
'1hashCode',
'1length',
'1substring',
'0Latin1.hashCode',
'7newString',
'0UTF16.compress',
'+ystem$2.decodeASCII',
'0.arraycopy',
'*ThreadLocal.get',
'%nio/charset/CharsetDecoder.decode',
')file/Files.readAllLines',
'%util/ArrayList$ArrayListSpliterator.tryAdvance',
'3.add',
'4grow',
'4sort',
'/s.copyOf',
'7Range',
'1sort',
'*Comparator$$Lambda$5.0x00007ffab90494b0.compare',
'4.lambda$comparingInt$7b0bb60$1',
'*HashMap$Node.<init>',
'1.hash',
'2newNode',
'2put',
'5Val',
'2resize',
'.Set.add',
'*TimSort.binarySort',
'2mergeAt',
'7Collapse',
'7ForceCollapse',
'7Hi',
'7Lo',
'2sort',
'*stream/AbstractPipeline.copyInto',
'JWithCancel',
'Bevaluate',
'BwrapAndCopyInto',
'1Collectors$$Lambda$7.0x00007ffab904a268.accept',
'1DistinctOps$1$2.accept',
'Aend',
'1ReduceOps$3ReducingSink.accept',
';ReduceOp.evaluateSequential',
'3ferencePipeline$3$1.accept',
'B.collect',
'CforEachWithCancel',
'1Sink$ChainedReference.end',
'2liceOps$1$1.accept',
'2ortedOps$RefSortingSink.accept',
'Jend',
'!long_disjoint_arraycopy',
' ksys_read',
' new_sync_read',
' oop_arraycopy',
' start_thread',
'!un/nio/ch/ChannelInputStream.read',
'+FileChannelImpl.read',
'/DispatcherImpl.read',
'B0',
'+IOUtil.read',
'6IntoNativeBuffer',
'+Util.getTemporaryDirectBuffer',
')s/StreamDecoder.implRead',
'9read',
'=Bytes',
'+UTF_8$Decoder.decodeArrayLoop',
'?Loop',
' thread_native_entry',
' vfs_read',
'!oid ContiguousSpace::oop_since_save_marks_iterate<DefNewScanClosure>',
'%OopOopIterateDispatch<DefNewScanClosure>::Table::oop_oop_iterate<InstanceKlass, narrowOop>',
'fObjArrayKlass, narrowOop>',
'AYoungerGenClosure>::Table::oop_oop_iterate<InstanceKlass, narrowOop>'
];
unpack(cpool);
n(3,584)
f(635,1,1,178)
u(1323)
u(1428)
u(516)
u(188,70)
u(76)
f(84,7,2,68)
f(12,8,2,63)
u(60)
u(52,36)
f(204,11,3,2)
n(284,7)
u(292)
f(300,13,1,2)
n(308,4)
f(316,11,4,24)
f(324,12,6,2)
n(332,9)
f(348,13,5,4)
f(356,12,4,2)
n(364)
u(340)
f(484,12,2,3)
u(476)
f(68,10,3,23)
f(412,11,2,16)
f(380,12,1,15)
f(372,13,1,3)
n(388,2)
n(396)
n(404)
n(428,5)
f(436,14,1,2)
n(444)
u(420)
f(452,11,2,3)
u(468)
f(460,13,1,2)
f(492,11,2)
u(452)
u(468)
f(276,10,2,4)
u(236)
u(244)
u(260)
u(268)
u(252)
f(500,16,1,3)
f(500,17,1,2)
u(276)
u(236)
u(244)
u(260)
u(268)
u(252)
f(92,8,2,3)
u(20)
u(36)
u(28)
f(44,12,1,2)
f(540,5,2,108)
u(532)
u(524)
u(556)
u(548)
u(148)
u(140)
u(132)
u(116)
u(108)
f(1444,15,12,50)
f(1452,16,1,22)
f(124,17,2,20)
f(156,18,10,8)
f(228,19,2,2)
n(508)
n(605)
u(773)
u(757)
u(789)
u(573)
u(741)
u(613)
u(629)
u(621)
f(579,18,2)
f(1460,16,2,27)
f(124,17,11,16)
f(156,18,13,3)
u(605)
u(773)
u(757)
u(789)
u(573)
u(741)
u(613)
u(629)
u(621)
f(1468,15,3,46)
f(124,16,19,27)
f(100,17,10,4)
n(156,13)
f(228,18,1,2)
n(605,10)
u(773)
u(757)
u(789)
u(573)
u(741)
u(613)
u(629)
f(621,26,1,9)
f(713,1,9,405)
u(697)
u(705,229)
f(1241,4,1,228)
u(1177)
u(1225)
u(1185)
u(1161)
u(1169)
u(1249,107)
f(977,11,2,105)
f(1233,12,2,103,2,0,0)
u(657,13)
f(674,14,4,9,8,0,0)
f(866,15,1,3)
u(866)
f(906,15,3,5,4,0,0)
u(906,3,2,0,0)
u(922)
f(1018,18,1,2)
f(922,16,2)
f(1201,13,2,90,2,0,0)
u(1098,87,57,0,0)
u(1074,87,59,0,0)
u(1057,4)
u(689)
f(890,18,1,3)
f(914,19,1,2)
f(1082,16,2,83,59,1,0)
f(682,17,41,2)
n(1066,17)
u(1050)
f(1089,17,17,23,0,0,2)
f(1265,14,23,3)
u(1273)
u(985)
u(985)
u(993)
u(993)
u(1009)
u(1008)
u(172)
u(164)
u(212)
u(220)
u(587)
u(605)
u(773)
u(757)
u(789)
u(573)
u(741)
u(613)
u(629)
u(621)
f(1257,10,3,121)
u(1209)
u(1257)
u(1281)
f(1001,14,1,117)
u(1025)
u(1153)
u(1105,21,0,1,0)
f(1034,18,13,4)
u(1042)
f(666,20,1,3)
f(1315,18,3,4)
f(1121,17,4,73)
u(1113)
u(1137,16)
f(1034,20,14,2,1,0,0)
u(1042)
f(1145,19,2,57,0,2,1)
f(1033,20,54,3)
u(1042)
u(666)
f(1129,17,3,23)
u(1113)
u(1137)
f(1033,20,18,5,1,0,0)
u(1042)
u(666)
f(1218,14,5,3,1,0,0)
u(1194,3,1,0,0)
u(986,3,1,0,0)
f(985,17,1,2)
u(993)
u(993)
u(1009)
u(1008)
u(172)
u(164)
u(212)
u(220)
f(721,3,2,107)
u(969)
u(969)
f(801,6,4,97)
u(801,97,0,0,1)
f(793,8,77,14)
u(857,14,1,0,0)
u(1393,14,1,0,0)
f(1385,11,1,13)
u(961,3)
u(1417)
u(1409)
u(938)
u(882)
f(1401,12,3,10,2,0,0)
u(1329,10,3,0,0)
u(1329,10,3,0,0)
u(1329,10,3,0,0)
u(1337,10,3,1,0)
f(1361,17,2,8,2,0,0)
u(1361,8,2,0,0)
u(1369,6)
u(1345)
u(1353)
u(195)
u(563)
u(765)
u(749)
f(1301,26,1,5)
u(1437)
f(1309,28,1,4)
u(781)
f(645,30,1,3)
u(653)
f(1378,19,3,2)
u(954)
u(1291)
f(874,8,2,6,5,0,0)
u(874,6,5,0,0)
u(930,6,5,0,0)
f(985,6,6)
u(985)
u(993)
u(993)
u(1009)
u(1008,6,0,1,4)
f(172,12,2,4)
u(164)
u(212)
u(220)
u(587)
f(605,17,1,3)
u(773)
u(757)
u(789)
u(573)
u(741)
u(613)
u(629)
u(621)
f(729,3,3,69,0,2,1)
f(817,4,21,2)
u(1009)
u(945)
u(179)
u(595)
f(842,4,2,8,5,0,0)
f(826,5,1,7,5,0,0)
f(809,6,5,2)
u(1008,2,0,0,1)
f(849,4,2,38)
u(849,38,0,0,2)
f(834,6,12,23,21,0,0)
f(826,7,6,17,15,1,0)
f(898,6,17,3)
search();
</script></body></html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

BIN
.assets/images/heatmap.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

BIN
.assets/images/heatmap1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

BIN
.assets/images/heatmap2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

BIN
.assets/images/heatmap3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

BIN
.assets/images/heatmap4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 198 KiB

BIN
.assets/images/heatmap5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 236 KiB

BIN
.assets/images/heatmap6.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 271 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 166 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

132
.clang-tidy Normal file
View File

@@ -0,0 +1,132 @@
Checks: >
-*,
bugprone-assert-side-effect,
bugprone-bool-pointer-implicit-conversion,
bugprone-chained-comparison,
bugprone-copy-constructor-init,
bugprone-incorrect-roundings,
bugprone-infinite-loop,
bugprone-integer-division,
bugprone-misplaced-operator-in-strlen-in-alloc,
bugprone-misplaced-pointer-arithmetic-in-alloc,
bugprone-misplaced-widening-cast,
bugprone-non-zero-enum-to-bool-conversion,
bugprone-pointer-arithmetic-on-polymorphic-object,
bugprone-posix-return,
bugprone-redundant-branch-condition,
bugprone-return-const-ref-from-parameter,
bugprone-sizeof-container,
bugprone-standalone-empty,
bugprone-string-literal-with-embedded-nul,
bugprone-string-integer-assignment,
bugprone-suspicious-include,
bugprone-suspicious-memset-usage,
bugprone-suspicious-missing-comma,
bugprone-suspicious-realloc-usage,
bugprone-suspicious-semicolon,
bugprone-suspicious-string-compare,
bugprone-swapped-arguments,
bugprone-terminating-continue,
bugprone-too-small-loop-variable,
bugprone-undefined-memory-manipulation,
bugprone-undelegated-constructor,
bugprone-unhandled-self-assignment,
bugprone-unused-raii,
bugprone-unused-return-value,
bugprone-use-after-move,
bugprone-virtual-near-miss,
cppcoreguidelines-misleading-capture-default-by-value,
cppcoreguidelines-pro-type-const-cast,
cppcoreguidelines-slicing,
cert-oop58-cpp,
cert-flp30-c,
misc-confusable-identifiers,
misc-definitions-in-headers,
misc-header-include-cycle,
misc-misplaced-const,
misc-non-copyable-objects,
misc-redundant-expression,
misc-static-assert,
misc-unconventional-assign-operator,
misc-unused-alias-decls,
performance-avoid-endl,
performance-faster-string-find,
performance-for-range-copy,
performance-implicit-conversion-in-loop,
performance-inefficient-algorithm,
performance-inefficient-string-concatenation,
performance-inefficient-vector-operation,
performance-move-const-arg,
performance-move-constructor-init,
performance-no-automatic-move,
performance-noexcept-destructor,
performance-noexcept-move-constructor,
performance-noexcept-swap,
performance-trivially-destructible,
performance-type-promotion-in-math-fn,
performance-unnecessary-copy-initialization,
performance-unnecessary-value-param,
readability-avoid-return-with-void-value,
readability-avoid-unconditional-preprocessor-if,
readability-const-return-type,
readability-container-contains,
readability-container-data-pointer,
readability-container-size-empty,
readability-delete-null-pointer,
readability-duplicate-include,
readability-function-size,
readability-identifier-naming,
readability-misleading-indentation,
readability-misplaced-array-index,
readability-named-parameter,
readability-operators-representation,
readability-qualified-auto,
readability-redundant-access-specifiers,
readability-redundant-casting,
readability-redundant-control-flow,
readability-redundant-declaration,
readability-redundant-function-ptr-dereference,
readability-redundant-preprocessor,
readability-redundant-string-cstr,
readability-redundant-string-init,
readability-reference-to-constructed-temporary,
readability-simplify-subscript-expr,
readability-static-accessed-through-instance,
readability-static-definition-in-anonymous-namespace,
readability-string-compare,
readability-uniqueptr-delete-release,
readability-use-anyofallof,
# TODO: Consider these
# bugprone-switch-missing-default-case
# bugprone-multi-level-implicit-pointer-conversion
# bugprone-branch-clone
# cert-err33-c
# cppcoreguidelines-narrowing-conversions
# cppcoreguidelines-init-variables
# cppcoreguidelines-explicit-virtual-functions
# cppcoreguidelines-special-member-functions
# llvm-include-order
# misc-const-correctness
# modernize-*
# performance-enum-size
# readability-function-cognitive-complexity
# readability-else-after-return
# readability-convert-member-functions-to-static
# readability-math-missing-parentheses
# readability-non-const-parameter
# readability-redundant-member-init
# readability-simplify-boolean-expr
# misc-include-cleaner
# google-explicit-constructor
# cppcoreguidelines-virtual-class-destructor
# readability-make-member-function-const
HeaderFilterRegex: "*"
CheckOptions:
- key: readability-identifier-naming.LocalVariableCase
value: lower_case
- key: readability-identifier-naming.LocalVariableIgnoredRegexp
value: '(KB|Thread|setDaemon|klassOop|nVMs|loadLibrary|getTicksFrequency|counterTime|System|M|R|s_)'
- key: readability-identifier-naming.PrivateMemberPrefix
value: _
- key: readability-identifier-naming.ConstexprVariableCase
value: UPPER_CASE

51
.github/ISSUE_TEMPLATE/bug-report.yml vendored Normal file
View File

@@ -0,0 +1,51 @@
---
name: "🐛 Bug Report"
description: Report a bug
title: "(short issue description)"
assignees: []
body:
- type: textarea
id: description
attributes:
label: Describe the bug
description: What is the problem? A clear and concise description of the bug.
validations:
required: true
- type: textarea
id: behavior
attributes:
label: Expected vs. actual behavior
description: |
What did you expect to happen? What happened instead?
validations:
required: false
- type: textarea
id: reproduction
attributes:
label: Reproduction Steps
description: |
Step-by-step instructions how to reproduce the issue. Attach a code sample if available.
validations:
required: false
- type: textarea
id: context
attributes:
label: Additional Information/Context
description: |
Anything else that might be relevant for troubleshooting this bug: profiles, screenshots, etc.
validations:
required: false
- type: input
id: version
attributes:
label: Async-profiler version
validations:
required: true
- type: textarea
id: environment
attributes:
label: Environment details
description: |
OS name and version, JDK version, CPU architecture. Is an application running in a container?
validations:
required: false

6
.github/ISSUE_TEMPLATE/config.yml vendored Normal file
View File

@@ -0,0 +1,6 @@
---
blank_issues_enabled: false
contact_links:
- name: 💬 General Question
url: https://github.com/async-profiler/async-profiler/discussions
about: Please ask and answer questions as a discussion thread

View File

@@ -0,0 +1,14 @@
---
name: "📕 Documentation Issue"
description: Report an issue in the profiler documentation
title: "(short issue description)"
labels: [documentation]
assignees: []
body:
- type: textarea
id: description
attributes:
label: Describe the issue
description: A clear and concise description of the issue.
validations:
required: true

View File

@@ -0,0 +1,39 @@
---
name: 🚀 Feature Request
description: Suggest an idea for this project
title: "(short issue description)"
labels: [enhancement]
assignees: []
body:
- type: textarea
id: description
attributes:
label: Describe the feature
description: A clear and concise description of the feature you are proposing.
validations:
required: true
- type: textarea
id: use-case
attributes:
label: Use Case
description: |
Why do you need this feature? For example: "I'm always frustrated when..."
validations:
required: true
- type: textarea
id: solution
attributes:
label: Proposed Solution
description: |
Suggest how to implement the addition or change. Provide references to alternative solutions, if any.
validations:
required: false
- type: checkboxes
id: ack
attributes:
label: Acknowledgements
options:
- label: I may be able to implement this feature request
required: false
- label: This feature might incur a breaking change
required: false

17
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View File

@@ -0,0 +1,17 @@
### Description
### Related issues
### Motivation and context
### How has this been tested?
---
By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license].
[Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0

83
.github/workflows/build.yml vendored Normal file
View File

@@ -0,0 +1,83 @@
name: build-template
on:
workflow_call:
inputs:
platform:
type: string
required: true
runner:
type: string
required: true
container-image:
type: string
required: false
env:
build_java_distribution: corretto
build_java_version: 11
permissions:
contents: read
jobs:
build:
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.container-image && format('public.ecr.aws/async-profiler/asprof-builder-{0}', inputs.container-image) || '' }}
name: "build and unit test (${{ inputs.platform }})"
steps:
- name: Run container setup
if: inputs.container-image != ''
run: "[ ! -f /root/setup.sh ] || /root/setup.sh"
- name: Setup Java
uses: actions/setup-java@v4
with:
distribution: ${{ env.build_java_distribution }}
java-version: ${{ env.build_java_version }}
- name: Checkout sources
uses: actions/checkout@v4
- name: Build and unit test
id: build
run: |
set -x
HASH=${GITHUB_SHA:0:7}
case "${{ inputs.platform }}" in
macos*)
brew install gcovr
make COMMIT_TAG=$HASH FAT_BINARY=true release coverage -j
;;
*)
make COMMIT_TAG=$HASH CC=/usr/local/musl/bin/musl-gcc release coverage -j
echo "debug_archive=$(find . -type f -name "async-profiler-*-debug*" -exec basename {} \;)" >> $GITHUB_OUTPUT
;;
esac
echo "archive=$(find . -type f -name "async-profiler-*" -not -name "*-debug*" -exec basename {} \;)" >> $GITHUB_OUTPUT
shell: bash
env:
GITHUB_SHA: ${{ github.sha }}
- name: Set artifact name
id: set_artifact_name
run: echo "artifact_name=async-profiler-${{ inputs.platform }}-${GITHUB_SHA:0:7}" >> $GITHUB_OUTPUT
shell: bash
env:
GITHUB_SHA: ${{ github.sha }}
- name: Upload binaries
uses: actions/upload-artifact@v4
with:
name: ${{ steps.set_artifact_name.outputs.artifact_name }}
path: ${{ steps.build.outputs.archive }}
if-no-files-found: error
- name: Upload debug info
uses: actions/upload-artifact@v4
if: inputs.platform != 'macos'
with:
name: ${{ steps.set_artifact_name.outputs.artifact_name }}-debug
path: ${{ steps.build.outputs.debug_archive }}
if-no-files-found: error
- name: Upload coverage report
uses: actions/upload-artifact@v4
with:
name: test-coverage-${{ inputs.platform }}
path: build/test/coverage/
if-no-files-found: error

48
.github/workflows/clang-tidy-review.yml vendored Normal file
View File

@@ -0,0 +1,48 @@
name: clang-tidy-review
on:
workflow_run:
workflows:
- code-check
types:
- completed
jobs:
clang-tidy-results:
if: ${{ github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success' }}
runs-on: ubuntu-latest
container:
image: "public.ecr.aws/async-profiler/asprof-code-check:latest"
permissions:
pull-requests: write
contents: write
actions: read
steps:
- name: Download code-check artifacts
uses: actions/download-artifact@v4
with:
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ secrets.GITHUB_TOKEN }}
name: code-check-artifacts
path: /tmp/code-check-artifacts/
- name: Read PR information
id: pr_info
run: |
cd /tmp/code-check-artifacts
echo "pr_id=$(cat pr-id.txt)" >> "$GITHUB_OUTPUT"
echo "pr_head_repo=$(cat pr-head-repo.txt)" >> "$GITHUB_OUTPUT"
echo "pr_head_sha=$(cat pr-head-sha.txt)" >> "$GITHUB_OUTPUT"
- uses: actions/checkout@v4
with:
repository: ${{ steps.pr_info.outputs.pr_head_repo }}
ref: ${{ steps.pr_info.outputs.pr_head_sha }}
persist-credentials: false
- name: Run clang-tidy-pr-comments action
uses: platisd/clang-tidy-pr-comments@v1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
clang_tidy_fixes: /tmp/code-check-artifacts/clang-tidy-fixes.yml
pull_request_id: ${{ steps.pr_info.outputs.pr_id }}
python_path: python
auto_resolve_conversations: true
suggestions_per_comment: 100

49
.github/workflows/code-check.yml vendored Normal file
View File

@@ -0,0 +1,49 @@
name: code-check
on:
- pull_request
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
cpp-lint:
runs-on: ubuntu-latest
container:
image: "public.ecr.aws/async-profiler/asprof-code-check:latest"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 0
- name: Mark repo as safe for Git
run: git config --global --add safe.directory $GITHUB_WORKSPACE
- name: Fetch base branch
run: |
git remote add upstream "https://github.com/${{ github.event.pull_request.base.repo.full_name }}"
git fetch --no-tags --no-recurse-submodules upstream "${{ github.event.pull_request.base.ref }}"
- name: Create artifacts directory
run: |
mkdir code-check-artifacts/
echo "${{ github.event.number }}" > code-check-artifacts/pull-request-id.txt
- name: Run clang-tidy
run: |
set pipefail
make cpp-lint-diff \
DIFF_BASE="$(git merge-base HEAD "upstream/${{ github.event.pull_request.base.ref }}")" \
CLANG_TIDY_ARGS_EXTRA="-export-fixes code-check-artifacts/clang-tidy-fixes.yml"
shell: bash
- name: Save PR information
run: |
echo "${{ github.event.number }}" > code-check-artifacts/pr-id.txt
echo "${{ github.event.pull_request.head.repo.full_name }}" > code-check-artifacts/pr-head-repo.txt
echo "${{ github.event.pull_request.head.sha }}" > code-check-artifacts/pr-head-sha.txt
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: code-check-artifacts
path: code-check-artifacts/

119
.github/workflows/integ.yml vendored Normal file
View File

@@ -0,0 +1,119 @@
name: integration-test-template
on:
workflow_call:
inputs:
test-platform:
type: string
required: true
platform:
type: string
required: true
architecture:
type: string
required: false
java-version:
type: string
required: true
java-distribution:
type: string
required: false
default: "corretto"
runner:
type: string
required: true
container-image:
type: string
required: false
container-volumes:
type: string
required: false
use-builtin-jdk:
type: boolean
required: false
default: false
retry-count:
type: number
required: false
default: 0
permissions:
contents: read
jobs:
integration-test:
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.container-image && format('public.ecr.aws/async-profiler/asprof-builder-{0}', inputs.container-image) || '' }}
options: --privileged
volumes: ${{ fromJSON(inputs.container-volumes || '[]') }}
name: "${{ inputs.test-platform }}, ${{ inputs.java-distribution }} ${{ inputs.java-version }}"
steps:
- name: Run container setup
if: inputs.container-image != ''
run: "[ ! -f /root/setup.sh ] || /root/setup.sh"
- name: Setup Java
uses: actions/setup-java@v4
# https://github.com/actions/setup-java/issues/678#issuecomment-2446279753
if: ${{ !inputs.use-builtin-jdk }}
with:
distribution: ${{ inputs.java-distribution }}
java-version: ${{ inputs.java-version }}
architecture: ${{ inputs.architecture }}
- name: Checkout sources
uses: actions/checkout@v4
- name: Set variables
id: set_variables
run: |
echo "short_sha=${GITHUB_SHA:0:7}" >> $GITHUB_OUTPUT
echo "artifact_name=async-profiler-${{ inputs.platform }}-${GITHUB_SHA:0:7}" >> $GITHUB_OUTPUT
shell: bash
env:
GITHUB_SHA: ${{ github.sha }}
- name: Download async-profiler release artifact
uses: actions/download-artifact@v4
with:
name: ${{ steps.set_variables.outputs.artifact_name }}
path: async_profiler_release
- name: Download async-profiler JAR artifacts
uses: actions/download-artifact@v4
with:
name: async-profiler-jars
path: jar_artifacts
- name: Extract async-profiler artifact
id: extract_artifact
run: |
release_archive=$(basename $(find async_profiler_release -type f -iname "async-profiler-*" ))
case "${{ inputs.runner }}" in
macos*)
unzip async_profiler_release/$release_archive
;;
*)
tar xvf async_profiler_release/$release_archive
;;
esac
echo "jars_directory=jar_artifacts" >> $GITHUB_OUTPUT
echo "release_directory=$(basename $(find . -type d -iname "async-profiler-*" ))" >> $GITHUB_OUTPUT
- name: Download Protobuf Java runtime
run: |
mkdir -p test/deps
cd test/deps
curl -L -O "https://repo1.maven.org/maven2/com/google/protobuf/protobuf-java/$PB_JAVA_VERSION/protobuf-java-$PB_JAVA_VERSION.jar"
env:
PB_JAVA_VERSION: "4.31.1"
- name: Run integration tests
run: |
mkdir -p build/jar
cp ${{ steps.extract_artifact.outputs.jars_directory }}/* build/jar
make build/test.jar
cp -r ${{ steps.extract_artifact.outputs.release_directory }}/bin build
cp -r ${{ steps.extract_artifact.outputs.release_directory }}/lib build
make test-java TEST_THREADS=2 RETRY_COUNT=${{ inputs.retry-count }} -j
- name: Upload integration test logs
uses: actions/upload-artifact@v4
if: always()
with:
name: integration-test-logs-${{ inputs.test-platform }}-${{ inputs.java-version }}-${{ steps.set_variables.outputs.short_sha }}
path: |
build/test/logs/
hs_err*.log

59
.github/workflows/linters.yml vendored Normal file
View File

@@ -0,0 +1,59 @@
name: lint
on:
- push
- pull_request
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
license-header:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check license headers
uses: apache/skywalking-eyes/header@v0.6.0
markdown:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install prettier
run: |
npm install -g prettier@3.4.2
make check-md
eof-newline:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: EOF newline check
env:
offenders_path: /tmp/eof_newline_offenders.txt
run: |
find . -path './.git' -prune -o -exec file --mime-type {} + | grep 'text/' | awk -F: '{print $1}' | while read -r file; do
# Read last byte and verify it's a newline
if [ -s "$file" ] && [ "$(tail -c1 "$file" | wc -l)" -eq 0 ]; then
echo "$file" >> "$offenders_path"
fi
done
if [ -s "$offenders_path" ]; then
cat "$offenders_path"
exit 1
fi
trailing-spaces:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Trailing spaces check
env:
offenders_path: /tmp/trailing_space_offenders.txt
run: |
grep -rIlE --exclude-dir=.git '[[:blank:]]+$' . > "$offenders_path" || true
if [ -s "$offenders_path" ]; then
cat "$offenders_path"
exit 1
fi

View File

@@ -0,0 +1,209 @@
name: CI
on: # We are very liberal in terms of triggering builds. This should be revisited if we start seeing a lot of queueing
- push
- pull_request
- workflow_dispatch
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
build-jars:
runs-on: ubuntu-latest
name: build / jars
steps:
- name: Checkout sources
uses: actions/checkout@v4
- name: Build JARs
run: make jar
- name: Upload JARs
uses: actions/upload-artifact@v4
with:
name: async-profiler-jars
path: build/jar/*
if-no-files-found: error
build-linux-arm64:
name: build / linux-arm64
uses: ./.github/workflows/build.yml
with:
platform: linux-arm64
runner: ubuntu-24.04-arm
container-image: "arm:latest"
build-linux-x64:
name: build / linux-x64
uses: ./.github/workflows/build.yml
with:
platform: linux-x64
runner: ubuntu-latest
container-image: x86:latest
build-macos:
name: build / macos
uses: ./.github/workflows/build.yml
with:
platform: macos
runner: macos-15
integ-linux-x64:
name: integ / linux-x64
needs: [build-linux-x64, build-jars]
strategy:
fail-fast: false
matrix:
test-platform: [linux-x64]
java-version: [8, 11, 17, 21, 25]
java-distribution: [corretto]
container-image: [x86:latest]
include:
- test-platform: linux-x64-alpine
container-image: alpine:corretto-11
use-builtin-jdk: true
java-distribution: corretto
java-version: 11
- test-platform: linux-x64-AL2
container-image: amazonlinux:2
# GHA provides Node.js by attaching a volume to the container. The container path is
# '/__e/node20', and it's not writable unless we override it via 'container.volumes'.
container-volumes: '["/tmp/node20:/__e/node20"]'
java-version: 11
java-distribution: corretto
- test-platform: linux-x64-AL2023
container-image: amazonlinux:2023
java-version: 11
java-distribution: corretto
- test-platform: linux-x64-alpaquita
container-image: alpaquita:x86_64-liberica-21
use-builtin-jdk: true
java-distribution: liberica
java-version: 21
uses: ./.github/workflows/integ.yml
with:
platform: linux-x64
test-platform: ${{ matrix.test-platform }}
runner: ubuntu-latest
container-image: ${{ matrix.container-image }}
container-volumes: ${{ matrix.container-volumes || '' }}
java-version: ${{ matrix.java-version }}
java-distribution: ${{ matrix.java-distribution }}
use-builtin-jdk: ${{ matrix.use-builtin-jdk || false }}
integ-linux-arm64:
name: integ / linux-arm64
needs: [build-linux-arm64, build-jars]
strategy:
fail-fast: false
matrix:
test-platform: [linux-arm64]
java-version: [8, 11, 17, 21, 25]
java-distribution: [corretto]
container-image: [arm:latest]
uses: ./.github/workflows/integ.yml
with:
platform: linux-arm64
test-platform: ${{ matrix.test-platform }}
runner: ubuntu-24.04-arm
container-image: ${{ matrix.container-image }}
container-volumes: ${{ matrix.container-volumes || '' }}
java-version: ${{ matrix.java-version }}
java-distribution: ${{ matrix.java-distribution }}
integ-macos:
name: integ / macos
needs: [build-macos, build-jars]
strategy:
fail-fast: false
matrix:
include:
- runner: macos-15
test-platform: macos-arm64
java-version: "11"
- runner: macos-15
test-platform: macos-arm64
java-version: "21"
- runner: macos-15-intel
test-platform: macos-x64
java-version: "17"
architecture: x64
retry-count: 1
uses: ./.github/workflows/integ.yml
with:
platform: macos
test-platform: ${{ matrix.test-platform }}
runner: ${{ matrix.runner }}
java-version: ${{ matrix.java-version }}
architecture: ${{ matrix.architecture || '' }}
retry-count: ${{ matrix.retry-count || 0 }}
publish-only-on-push:
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
permissions:
contents: write
name: publish (nightly)
runs-on: ubuntu-latest
needs: [integ-linux-x64, integ-linux-arm64, integ-macos]
steps:
- name: Download async-profiler binaries and jars
uses: actions/download-artifact@v4
with:
pattern: 'async-profiler-*'
merge-multiple: 'true'
- name: Delete previous release and publish new release
uses: actions/github-script@v7
with:
result-encoding: string
script: |
const fs = require('fs').promises;
const commonOptions = {
owner: "async-profiler",
repo: "async-profiler",
};
let previousRelease = undefined;
try {
previousRelease = await github.rest.repos.getReleaseByTag({
...commonOptions,
tag: "nightly",
});
} catch (e) {
console.log("No previous nightly release");
// ignore, there was no previous nightly release
}
if (previousRelease !== undefined) {
// delete previous release and nightly tag
await github.rest.repos.deleteRelease({
...commonOptions,
release_id: previousRelease.data.id,
});
await github.rest.git.deleteRef({...commonOptions, ref: "tags/nightly"});
}
// create draft release
const newReleaseId = (await github.rest.repos.createRelease({
...commonOptions,
tag_name: "nightly",
target_commitish: "${{ github.sha }}",
name: "Nightly builds",
body: "Async-profiler binaries published automatically from the latest sources in `master` upon a successful build.",
prerelease: true,
draft: true,
})).data.id;
// upload binaries and jars to draft release
for (const archiveName of await fs.readdir(process.cwd())) {
await github.rest.repos.uploadReleaseAsset({
...commonOptions,
release_id: newReleaseId,
name: archiveName,
data: await fs.readFile(archiveName),
});
}
// publish release
await github.rest.repos.updateRelease({
...commonOptions,
release_id: newReleaseId,
draft: false,
});

4
.gitignore vendored
View File

@@ -1,6 +1,10 @@
/build/
/nbproject/
/out/
/target/
/.idea/
/test/*.class
.vscode
*.iml
/src/api/**/*.class
.gdb_history

24
.licenserc.yaml Normal file
View File

@@ -0,0 +1,24 @@
header:
- paths:
- 'src/jattach'
license:
content: |
Copyright The jattach authors
SPDX-License-Identifier: Apache-2.0
comment: on-failure
- paths:
- 'src'
- 'test'
paths-ignore:
- 'src/jattach'
- 'src/res'
- '**/MANIFEST.MF'
- 'test/**/*.collapsed'
license:
content: |
Copyright The async-profiler authors
SPDX-License-Identifier: Apache-2.0
comment: on-failure

View File

@@ -1,11 +0,0 @@
language: cpp
dist: precise
sudo: required
before_install:
- sudo apt-get install default-jdk
- sudo bash -c 'echo 1 > /proc/sys/kernel/perf_event_paranoid'
script: make && make test

View File

@@ -1,15 +1,566 @@
# Changelog
## [1.6 Early Access]
## [4.4]
### Features
- #1553: Differential Flame Graphs
### Improvements
- #1705: `memlimit` option to limit size of the call trace storage
- #1706: Extend syntax of `-j` option to truncate deep stacks
- #1720: FlameGraph: Dark mode toggle
- #1672: FlameGraph: Use Ctrl+Click in addition to Alt+Click to remove stacks
- #1684: Unwind ARM64 generated stubs on JDK 26+
- #1676: Make `dwarf` stack walking mode an alias for `vm`
- #1671: An option to select TLAB based AllocTracer engine with JDK 11+
- #1670: Move converter Main class to the one.convert package
- #1660: Provide non-aggregated samples in OTLP converter
- #1701, #1682: Speed-up stack walking
### Breaking changes
- #1673: Permanently remove `check` command
- #1675: Remove unsafe AsyncGetCallTrace recovery tricks along with `safemode` option
- #1677: Remove `cstack=lbr` option
### Bug fixes
- #1727: Allocation profile has wrong units in OTLP format
- #1716: Wall-clock Heatmap does not count samples correctly
- #1715: Fix Zing crash when profiling cpu+wall together
- #1708: Another fix for correct vDSO unwinding on ARM64
- #1707: Workaround for JFR shutdown race
- #1699: Allow negative keys in JFR constant pool
- #1697: Ensure remaining buffer is sufficient for event data in JfrReader
- #1657: Re-enable workaround for a long attach on JDK 8
- #1654: Prefer perf-events engine when record-cpu or target-cpu are selected
- #1585: Scale perf counters in case of multiplexing
- #1528: Add a hard-coded limit on the maximum number of jmethodIDs
- #1203: Fix "Instance field not found" when using `-Xcheck:jni` on JDK 8
- Do not walk past virtual thread continuation barriers
## [4.3] - 2026-01-20
### Features
- #1547: Native lock profiling
- #1566: Filter cpu/wall profiles by latency
- #1568: Expose async-profiler metrics in Prometheus format
- #1628: async-profiler.jar as Java agent; remote control via JMX
### Improvements
- #1140: FlameGraph improvements: legend, hot keys, new toolbar icons
- #1530: Timezone switcher between Local and UTC time in Heatmaps
- #1582: Support `--include`/`--exclude` options for JFR to Heatmap/OTLP/pprof conversion
- #1624: Compatibility with OTLP v1.9.0
- #1629: Harden crash protection in StackWalker
### Breaking changes
- #1277: New `timeSpan` field in WallClockSample events
- #1518: Deprecate `check` command
- #1590: Support compilation on modern JDKs. Drop JDK 7 support
### Bug fixes
- #1599: Workaround for the kernel PERF_EVENT_IOC_REFRESH bug
- #1596: Do not block any signals during execution of a custom crash handler
- #1584: JfrReader loops on corrupted recordings
- #1555: Parse FlameGraph title from HTML input
- #1621: `loop` and `timeout` options do not work together
- #1641: Unwind vDSO correctly on Linux-ARM64
- #1648: Fix stop sequence in Profiler::start
- #1575: Fix CodeCache memory leak in lock profiling while looping
- #1558: Fix record-cpu bug when kernel stacks are not available
- #1651: Do not record CPU frame for non-perf samples
- #1614, #1615, #1617, #1623: Fix races related to VM termination
## [4.2.1] - 2025-11-22
### Bug fixes
- #1599: Workaround for the kernel PERF_EVENT_IOC_REFRESH bug
- #1596: Do not block any signals during execution of a custom crash handler
## [4.2] - 2025-10-20
### Features
- Java Method Tracing and Latency Profiling
* #1421: Latency profiling
* #1435: Allow wildcards in Instrument profiling engine
* #1499: `--trace` option with per-method latency threshold
- System-wide process sampling on Linux
* #1411: `--proc` option to record `profiler.ProcessSample` events
- VMStructs stack walker by default
* #1539: Use VMStructs stack walking mode by default
* #1537: Support `comptask` and `vtable` features
* #1517: Use JavaFrameAnchor to find top Java frame
* #1449: Special handling of prologue and epilogue of compiled methods
### Improvements
- #1475: Add `CPUTimeSample` event support to jfrconv
- #1414: Per-thread flamegraph option in JFR heatmap converter
- #1526: Expose JfrReader dictionary that maps osThreadId to javaThreadId
- #1448: Thread name in OpenTelemetry output
- #1413: Add `time_nanos` and `duration_nanos` to OTLP profiles
- #1450: Unwind dylib stubs as empty frames on macOS
- #1416: Add synthetic symbols for Mach-O stubs/trampolines
- Allow cross-compilation for 32-bit platforms
### Bug fixes
- #1515: Fix UnsatisfiedLinkError when tmpdir is set to a relative path
- #1500: Detect if `calloc` calls `malloc` for nativemem profiling
- #1427: Re-implement SafeAccess crash protection
- #1417: Two wall-clock profilers interfere with each other
### Project Infrastructure
- #1527: GHA: replace macos-13 with macos-15-intel
- #1510: Add option to retry tests
- #1508: Add more GHA jobs to cover JDK versions on ARM
- #1502: Fix job dependencies between integration tests and builds
- #1466: Add Liberica JDK on Alpaquita Linux to the CI
- Made integration tests more stable overall
## [4.1] - 2025-07-21
### Features
- Experimental support for the OpenTelemetry profiling signal
* #1188: OTLP output format and `dumpOtlp` Java API
* #1336: JFR to OTLP converter
- JDK 25 support
* #1222: Update VMStructs for JDK 25
- Productize native memory profiling
* #1193: Full `nativemem` support on macOS
* #1254: Fixed Nativemem tests on Alpine
* #1269: Native memory profiling now works with `jemalloc`
* #1323: `nativemem` shows allocations inside async-profiler itself
### Improvements
- #1174: Detect JVM in non-Java application and attach to it
- #1223: Native API to add custom events in JFR recording
- #1259: `--all` option to collect all possible events simultaneously
- #1286: Record which CPU a sample was taken on
- #1299: Skip last 10% allocations for leak detection
- #1300: Allow profiling kprobes/uprobes with `--fdtransfer`
- #1366: Rewrite `jfrconv` executable to shell
- #1400: Unwind checksum and digest intrinsics on ARM64
- #1357, #1389: VMStructs-based stack unwinding for `alloc` and `nativemem` profiling
### Bug fixes
- #1251: `--ttsp` option does not work on Alpine
- #1264: Guard hook installation with dlopen/dlclose
- #1319: SIGSEGV in PerfEvents::walk
- #1350: Disable JFR OldObjectSample event in jfrsync mode
- #1358: Do not dereference jmethodIDs on JDK 26
- #1374: Correctly check if profiler is preloaded
- #1380: Workaround clang type promotion bug
- #1387: JFR writer crashes when using cstack=vmx
- #1393: Improve stack walking termination logic: no endless `unknown` frames
- Stack unwinding fixes for ARM64
### Project Infrastructure
- #1129: Command-line option to filter tests
- #1262: Include `asprof.h` in async-profiler release package
- #1271: Release additional binaries with debug symbols
- #1274: Add Corretto 8 to the test matrix
- #1246, #1226: Run tests on Amazon Linux and Alpine Linux
- #1360: Auto-generated clang-tidy review comments
- #1373: Save all generated test logs for debug purposes
- Fixed flaky tests (#1282, #1307, #1376)
## [4.0] - 2025-04-08
### Features
- #895, #905: `jfrconv` binary and numerous converter enhancements
- #944: Interactive Heatmap
- #1064: Native memory leak profiler
- #1002: An option to display instruction addresses
- #1007: Optimize wall clock profiling
- #1073: Productize VMStructs-based stack walker: `--cstack vm/vmx`
- #1169: C API for accessing thread-local profiling context
### Improvements
- #923: Support JDK 23+
- #952: Solve musl and glibc compatibility issues; link `libstdc++` statically
- #955: `--libpath` option to specify path to `libasyncProfiler.so` in a container
- #1018: `--grain` converter option to coarsen flame graphs
- #1046: `--nostop` option to continue profiling outside `--begin`/`--end` window
- #1178: `--inverted` option to flip flame graphs vertically
- #1009: Allows collecting allocation and live object traces at the same time
- #925: An option to accumulate JFR events in memory instead of flushing to a file
- #929: Load symbols from debuginfod cache
- #982: Sample contended locks by overflowing interval bucket
- #993: Filter native frames in allocation profile
- #896: FlameGraph: `Alt+Click` to remove stacks
- #1097: FlameGraph: `N`/`Shift+N` to navigate through search results
- #1182: Retain by-thread grouping when reversing FlameGraph
- #1167: Log when no samples are collected
- #1044: Fall back to `ctimer` for CPU profiling when perf_events are unavailable
- #1068: Count missed samples when estimating total CPU time in `ctimer` mode
- #1142: Use counter-timer register for timestamps on ARM64
- #1123: Support `clock=tsc` without a JVM
- #1070: Demangle Rust v0 symbols
- #1007: Use `ExecutionSample` event for CPU profiling and `WallClockSample` for Wall clock profiling
- #1011: Obtain `can_generate_sampled_object_alloc_events` JVMTI capability only when needed
- #1013: Intercept java.util.concurrent locks more efficiently
- #759: Discover available profiling signal automatically
- #884: Record event timestamps early
- #885: Print error message if JVM fails to load libasyncProfiler
- #892: Resolve tracepoint id in `asprof`
- Suppress dynamic attach warning on JDK 21+
### Bug fixes
- #1143: Crash on macOS when using thread filter
- #1125: Fixed parsing concurrently loaded libraries
- #1095: jfr print fails when a recording has empty pools
- #1084: Fixed Logging related races
- #1074: Parse both .rela.dyn and .rela.plt sections
- #1003: Support both tracefs and debugfs for kernel tracepoints
- #986: Profiling output respects loglevel
- #981: Avoid JVM crash by deleting JNI refs after `GetMethodDeclaringClass`
- #934: Fix crash on Zing in a native thread
- #843: Fix race between parsing and concurrent unloading of shared libraries
- #1147, #1151: Deadlocks with jemalloc and tcmalloc profilers
- Stack walking fixes for ARM64
- Converter fixes for `jfrsync` profiles
- Fixed parsing non-PIC executables and shared objects with non-standard section layout
- Fixed recursion in `pthread_create` when using native profiling API
- Fixed crashes on Alpine when profiling native apps
- Fixed warnings with `-Xcheck:jni`
- Fixed "Unsupported JVM" on OpenJ9 JDK 21
- Fixed DefineClass crash on OpenJ9
- JfrReader should handle custom events properly
- Handle truncated JFRs
### Project Infrastructure
- Restructure and update documentation
- Implement test framework; add new integration tests
- Unit test framework for C++ code
- Run CI on all supported platforms
- Test multiple JDK versions in CI
- Add GHA to validate license headers
- Add Markdown checker and formatter
- Add Issue and Pull Request templates
- Add Contributing Guidelines and Code of Conduct
- Run static analyzer and fix found issues (#1034, #1039, #1049, #1051, #1098)
- Provide Dockerfile for building async-profiler release packages
- Publish nightly builds automatically
## [3.0] - 2024-01-20
### Features
- #724: Binary launcher `asprof`
- #751: Profile non-Java processes
- #795: AsyncGetCallTrace replacement
- #719: Classify execution samples into categories in JFR converter
- #855: `ctimer` mode for accurate profiling without perf_events
- #740: Profile CPU + Wall clock together
- #736: Show targets of vtable/itable calls
- #777: Show JIT compilation task
- #644: RISC-V port
- #770: LoongArch64 port
### Improvements
- #733: Make the same `libasyncProfiler` work with both glibc and musl
- #734: Support raw PMU event descriptors
- #759: Configure alternative profiling signal
- #761: Parse dynamic linking structures
- #723: `--clock` option to select JFR timestamp source
- #750: `--jfrsync` may specify a list of JFR events
- #849: Parse concatenated multi-chunk JFRs
- #833: Time-to-safepoint JFR event
- #832: Normalize names of hidden classes / lambdas
- #864: Reduce size of HTML Flame Graph
- #783: Shutdown asprof gracefully on SIGTERM
- Better demangling of C++ and Rust symbols
- DWARF unwinding for ARM64
- `JfrReader` can parse in-memory buffer
- Support custom events in `JfrReader`
- An option to read JFR file by chunks
- Record `GCHeapSummary` events in JFR
### Bug fixes
- Workaround macOS crashes in SafeFetch
- Fixed attach to OpenJ9 on macOS
- Support `UseCompressedObjectHeaders` aka Lilliput
- Fixed allocation profiling on JDK 20.0.x
- Fixed context-switches profiling
- Prefer ObjectSampler to TLAB hooks for allocation profiling
- Improved accuracy of ObjectSampler in `--total` mode
- Make Flame Graph status line and search results always visible
- `loop` and `timeout` options did not work in some modes
- Restart interrupted poll/epoll_wait syscalls
- Fixed stack unwinding issues on ARM64
- Workaround for stale jmethodIDs
- Calculate ELF base address correctly
- Do not dump redundant threads in a JFR chunk
- `check` action prints result to a file
- Annotate JFR unit types with `@ContentType`
## [2.9] - 2022-11-27
### Features
- Java Heap leak profiler
- `meminfo` command to print profiler's memory usage
- Profiler API with embedded agent as a Maven artifact
### Improvements
- `--include`/`--exclude` options in the FlameGraph converter
- `--simple` and `--dot` options in jfr2flame converter
- An option for agressive recovery of `[unknown_Java]` stack traces
- Do not truncate signatures in collapsed format
- Display inlined frames under a runtime stub
### Bug fixes
- Profiler did not work with Homebrew JDK
- Fixed allocation profiling on Zing
- Various `jfrsync` fixes
- Symbol parsing fixes
- Attaching to a container on Linux 3.x could fail
## [2.8.3] - 2022-07-16
### Improvements
- Support virtualized ARM64 macOS
- A switch to generate auxiliary events by async-profiler or FlightRecorder in jfrsync mode
### Bug fixes
- Could not recreate perf_events after the first failure
- Handle different versions of Zing properly
- Do not call System.loadLibrary, when libasyncProfiler is preloaded
## [2.8.2] - 2022-07-13
### Bug fixes
- The same .so works with glibc and musl
- dlopen hook did not work on Arch Linux
- Fixed JDK 7 crash
- Fixed CPU profiling on Zing
### Changes
- Mark interpreted frames with `_[0]` in collapsed output
- Double click selects a method name on a flame graph
## [2.8.1] - 2022-06-10
### Improvements
- JFR to pprof converter (contributed by @NeQuissimus)
- JFR converter improvements: time range, collapsed output, pattern highlighting
- `%n` pattern in file names; limit number of output files
- `--lib` to customize profiler library path in a container
- `profiler.sh list` command now works without PID
### Bug fixes
- Fixed crashes related to continuous profiling
- Fixed Alpine/musl compatibility issues
- Fixed incomplete collapsed output due to weird locale settings
- Workaround for JDK-8185348
## [2.8] - 2022-05-09
### Features
- Mark top methods as interpreted, compiled (C1/C2), or inlined
- JVM TI based allocation profiling for JDK 11+
- Embedded HTTP management server
### Improvements
- Re-implemented stack recovery for better reliability
- Add `loglevel` argument
- Do not mmap perf page in `--all-user` mode
- Distinguish runnable/sleeping threads in OpenJ9 wall-clock profiler
- `--cpu` converter option to extract CPU profile from the wall-clock output
## [2.7] - 2022-02-14
### Features
- Experimental support for OpenJ9 VM
- DWARF stack unwinding
### Improvements
- Better handling of VM threads (fixed missing JIT threads)
- More reliable recovery from `not_walkable` AGCT failures
- Do not accept unknown agent arguments
## [2.6] - 2022-01-09
### Features
- Continuous profiling; `loop` and `timeout` options
### Improvements
- Reliability improvements: avoid certain crashes and deadlocks
- Smaller and faster agent library
- Minor `jfr` and `jfrsync` enhancements (see the commit log)
## [2.5.1] - 2021-12-05
### Bug fixes
- Prevent early unloading of libasyncProfiler.so
- Read kernel symbols only for perf_events
- Escape backslashes in flame graphs
- Avoid duplicate categories in `jfrsync` mode
- Fixed stack overflow in RedefineClasses
- Fixed deadlock when flushing JFR
### Improvements
- Support OpenJDK C++ Interpreter (aka Zero)
- Allow reading incomplete JFR recordings
## [2.5] - 2021-10-01
### Features
- macOS/ARM64 (aka Apple M1) port
- PPC64LE port (contributed by @ghaug)
- Profile low-privileged processes with perf_events (contributed by @Jongy)
- Raw PMU events; kprobes & uprobes
- Dump results in the middle of profiling session
- Chunked JFR; support JFR files larger than 2 GB
- Integrate async-profiler events with JDK Flight Recordings
### Improvements
- Use RDTSC for JFR timestamps when possible
- Show line numbers and bci in Flame Graphs
- jfr2flame can produce Allocation and Lock flame graphs
- Flame Graph title depends on the event and `--total`
- Include profiler logs and native library list in JFR output
- Lock profiling no longer requires JVM symbols
- Better container support
- Native function profiler can count the specified argument
- An option to group threads by scheduling policy
- An option to prepend library name to native symbols
### Notes
- macOS build is provided as a fat binary that works both on x86-64 and ARM64
- 32-bit binaries are no longer shipped. It is still possible to build them from sources
- Dropped JDK 6 support (may still work though)
## [2.0] - 2021-03-14
### Features
- Profile multiple events together (cpu + alloc + lock)
- HTML 5 Flame Graphs: faster rendering, smaller size
- JFR v2 output format, compatible with FlightRecorder API
- JFR to Flame Graph converter
- Automatically turn profiling on/off at `--begin`/`--end` functions
- Time-to-safepoint profiling: `--ttsp`
### Improvements
- Unlimited frame buffer. Removed `-b` option and 64K stack traces limit
- Additional JFR events: OS, CPU, and JVM information; CPU load
- Record bytecode indices / line numbers
- Native stack traces for Java events
- Improved CLI experience
- Better error handling; an option to log warnings/errors to a dedicated stream
- Reduced the amount of unknown stack traces
### Changes
- Removed non-ASL code. No more CDDL license
## [1.8.4] - 2021-02-24
### Improvements
- Smaller and faster agent library
### Bug fixes
- Fixed JDK 7 crash during wall-clock profiling
## [1.8.3] - 2021-01-06
### Improvements
- libasyncProfiler.dylib symlink on macOS
### Bug fixes
- Fixed possible deadlock on non-HotSpot JVMs
- Gracefully stop profiler when terminating JVM
- Fixed GetStackTrace problem after RedefineClasses
## [1.8.2] - 2020-11-02
### Improvements
- AArch64 build is now provided out of the box
- Compatibility with JDK 15 and JDK 16
### Bug fixes
- More careful native stack walking in wall-clock mode
- `resume` command is not compatible with JFR format
- Wrong allocation sizes on JDK 8u262
## [1.8.1] - 2020-09-05
### Improvements
- Possibility to specify application name instead of `pid` (contributed by @yuzawa-san)
### Bug fixes
- Fixed long attach time and slow class loading on JDK 8
- `UnsatisfiedLinkError` during Java method profiling
- Avoid reading `/proc/kallsyms` when `--all-user` is specified
## [1.8] - 2020-08-10
### Features
- Converters between different output formats:
- JFR -> nflx (FlameScope)
- Collapsed stacks -> HTML 5 Flame Graph
### Improvements
- `profiler.sh` no longer requires bash (contributed by @cfstras)
- Fixed long attach time and slow class loading on JDK 8
- Fixed deadlocks in wall-clock profiling mode
- Per-thread reverse Flame Graph and Call Tree
- ARM build now works with ARM and THUMB flavors of JDK
### Changes
- Release package is extracted into a separate folder
## [1.7.1] - 2020-05-14
### Features
- LBR call stack support (available since Haswell)
### Improvements
- `--filter` to profile only specified thread IDs in wall-clock mode
- `--safe-mode` to disable selected stack recovery techniques
## [1.7] - 2020-03-17
### Features
- Profile invocations of arbitrary Java methods
- Filter stack traces by the given name pattern
- Java API to filter monitored threads
- `--cstack`/`--no-cstack` option
### Improvements
- Thread names and Java thread IDs in JFR output
- Wall clock profiler distinguishes RUNNABLE vs. SLEEPING threads
- Stable profiling interval in wall clock mode
- C++ function names as events, e.g. `-e VMThread::execute`
- `check` command to test event availability
- Allow shading of AsyncProfiler API
- Enable CPU profiling on WSL
- Enable allocation profiling on Zing
- Reduce the amount of `unknown_Java` samples
## [1.6] - 2019-09-09
### Features
- Pause/resume profiling
- Allocation profiling support for JDK 12, 13 (contributed by @rraptorr)
### Improvements
- Include all AsyncGetCallTrace failures in the profile
- Parse symbols of JNI libraries loaded in runtime
- The agent autodetects output format by the file extension
- Output file name patterns: `%p` and `%t`
- `-g` option to print method signatures
- `-j` can increase the maximum Java stack depth
- Output file name patterns: `%p` and `%t`
- Allocaton sampling rate can be adjusted with `-i`
- Parse symbols of JNI libraries loaded in runtime
- JDK 12 allocation profiling support (contributed by @rraptorr)
- Improved reliability on macOS
### Changes
- `-f` file names are now relative to the current shell directory
@@ -39,7 +590,7 @@
### Features
- Interactive Call tree and Backtrace tree in HTML format (contributed by @rpulle)
- Experimental support for Java Flight Recorder (JFR) compatible output
### Improvements
- Added units: `ms`, `us`, `s` and multipliers: `K`, `M`, `G` for interval argument
- API and command-line option `-v` for profiler version
@@ -53,7 +604,7 @@
### Features
- Profiling of native functions, e.g. malloc
### Improvements
- JDK 9, 10, 11 support for heap profiling with accurate stack traces
- `root` can now profile Java processes of any user
@@ -65,7 +616,7 @@
- Produce SVG files out of the box; flamegraph.pl is no longer needed
- Profile ReentrantLock contention
- Java API
### Improvements
- Allocation and Lock profiler now works on JDK 7, too
- Faster dumping of results

4
CODE_OF_CONDUCT.md Normal file
View File

@@ -0,0 +1,4 @@
## Code of Conduct
This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
opensource-codeofconduct@amazon.com with any additional questions or comments.

59
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,59 @@
# Contributing Guidelines
Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
documentation, we greatly value feedback and contributions from our community.
Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
information to effectively respond to your bug report or contribution.
## Security issue notifications
If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public GitHub issue.
## Reporting Bugs/Feature Requests
We welcome you to use the GitHub issue tracker to report bugs or suggest features.
When filing an issue, please check [existing open](https://github.com/async-profiler/async-profiler/issues), or [recently closed](https://github.com/async-profiler/async-profiler/issues?q=is%3Aissue+is%3Aclosed), issues to make sure somebody else hasn't already
reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
* A reproducible test case or series of steps
* The version of our code being used
* Any modifications you've made relevant to the bug
* Anything unusual about your environment or deployment
## Contributing via Pull Requests
Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
1. You are working against the latest source on the *master* branch.
2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
To send us a pull request, please:
1. Fork the repository.
2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
3. Ensure local tests pass.
4. Commit to your fork using clear commit messages.
5. Send us a pull request, answering any default questions in the pull request interface.
6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
## Finding contributions to work on
Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/async-profiler/async-profiler/labels/help%20wanted) issues is a great place to start.
## Code of Conduct
This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
opensource-codeofconduct@amazon.com with any additional questions or comments.
## Licensing
See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.

352
Makefile
View File

@@ -1,64 +1,338 @@
PROFILER_VERSION=1.6-ea
JATTACH_VERSION=1.5
LIB_PROFILER=libasyncProfiler.so
JATTACH=jattach
PROFILER_JAR=async-profiler.jar
CC=gcc
CFLAGS=-O2
CPP=g++
CPPFLAGS=-O2
INCLUDES=-I$(JAVA_HOME)/include
PROFILER_VERSION ?= 4.4
ifeq ($(COMMIT_TAG),true)
PROFILER_VERSION := $(PROFILER_VERSION)-$(shell git rev-parse --short=8 HEAD)
else ifneq ($(COMMIT_TAG),)
PROFILER_VERSION := $(PROFILER_VERSION)-$(COMMIT_TAG)
endif
TMP_DIR=/tmp
COMMA=,
PACKAGE_NAME=async-profiler-$(PROFILER_VERSION)-$(OS_TAG)-$(ARCH_TAG)
PACKAGE_DIR=$(TMP_DIR)/$(PACKAGE_NAME)
DEBUG_PACKAGE_NAME=$(PACKAGE_NAME)-debug
DEBUG_PACKAGE_DIR=$(PACKAGE_DIR)-debug
ASPROF=bin/asprof
JFRCONV=bin/jfrconv
LIB_PROFILER=lib/libasyncProfiler.$(SOEXT)
LIB_PROFILER_DEBUG=libasyncProfiler.$(SOEXT).debug
ASPROF_HEADER=include/asprof.h
API_JAR=jar/async-profiler.jar
CONVERTER_JAR=jar/jfr-converter.jar
TEST_JAR=test.jar
CC ?= gcc
CXX ?= g++
STRIP ?= strip
OBJCOPY ?= objcopy
ifneq ($(CROSS_COMPILE),)
CC := $(CROSS_COMPILE)gcc
CXX := $(CROSS_COMPILE)g++
AS := $(CROSS_COMPILE)as
LD := $(CROSS_COMPILE)ld
STRIP := $(CROSS_COMPILE)strip
OBJCOPY := $(CROSS_COMPILE)objcopy
endif
CFLAGS_EXTRA ?=
CXXFLAGS_EXTRA ?=
CFLAGS=-O3 -fno-exceptions $(CFLAGS_EXTRA)
CXXFLAGS=-O3 -fno-exceptions -fno-omit-frame-pointer -fvisibility=hidden -std=c++11 $(CXXFLAGS_EXTRA)
CPPFLAGS=
DEFS=-DPROFILER_VERSION=\"$(PROFILER_VERSION)\"
INCLUDES=-I$(JAVA_HOME)/include -Isrc/helper
LIBS=-ldl -lpthread
MERGE=true
GCOV ?= gcov
JAVAC=$(JAVA_HOME)/bin/javac
JAR=$(JAVA_HOME)/bin/jar
JAVA=$(JAVA_HOME)/bin/java
JAVA_TARGET=8
JAVAC_OPTIONS=--release $(JAVA_TARGET) -Xlint:-options
TEST_JAVA ?= $(JAVA_HOME)/bin/java
TEST_LIB_DIR=build/test/lib
TEST_BIN_DIR=build/test/bin
TEST_DEPS_DIR=test/deps
TEST_GEN_DIR=test/gen
LOG_DIR=build/test/logs
LOG_LEVEL=
SKIP=
RETRY_COUNT=0
TEST_THREADS ?= 8
TEST_FLAGS=-DlogDir=$(LOG_DIR) -DlogLevel=$(LOG_LEVEL) -Dskip='$(subst $(COMMA), ,$(SKIP))' -DretryCount=$(RETRY_COUNT) -DthreadCount=$(TEST_THREADS)
# always sort SOURCES so zInit is last.
SOURCES := $(sort $(wildcard src/*.cpp))
HEADERS := $(wildcard src/*.h)
RESOURCES := $(wildcard src/res/*)
JAVA_HELPER_CLASSES := $(wildcard src/helper/one/profiler/*.class)
API_SOURCES := $(wildcard src/api/one/profiler/*.java)
JAR_MANIFEST := src/api/one/profiler/MANIFEST.MF
CONVERTER_SOURCES := $(shell find src/converter -name '*.java')
TEST_SOURCES := $(shell find test -name '*.java' ! -path 'test/stubs/*')
TESTS ?=
CPP_TEST_SOURCES := test/native/testRunner.cpp $(shell find test/native -name '*Test.cpp')
CPP_TEST_HEADER := test/native/testRunner.hpp
CPP_TEST_INCLUDES := -Isrc -Itest/native
TEST_LIB_SOURCES := $(wildcard test/native/libs/*)
TEST_BIN_SOURCES := $(shell find test/test -name "*.c*")
ifeq ($(JAVA_HOME),)
export JAVA_HOME:=$(shell java -cp . JavaHome)
JAVA_HOME:=$(shell java -cp . JavaHome)
endif
OS:=$(shell uname -s)
ifeq ($(OS), Darwin)
CPPFLAGS += -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE
ifeq ($(OS),Darwin)
CXXFLAGS += -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -Wl,-rpath,@executable_path/../lib -Wl,-rpath,@executable_path/../lib/server
INCLUDES += -I$(JAVA_HOME)/include/darwin
RELEASE_TAG:=$(PROFILER_VERSION)-macos-x64
SOEXT=dylib
PACKAGE_EXT=zip
OS_TAG=macos
ifeq ($(FAT_BINARY),true)
FAT_BINARY_FLAGS=-arch x86_64 -arch arm64 -mmacos-version-min=10.15
CFLAGS += $(FAT_BINARY_FLAGS)
CXXFLAGS += $(FAT_BINARY_FLAGS)
PACKAGE_NAME=async-profiler-$(PROFILER_VERSION)-$(OS_TAG)
MERGE=false
endif
else
CXXFLAGS += -U_FORTIFY_SOURCE -Wl,-z,defs -Wl,--exclude-libs,ALL -static-libstdc++ -static-libgcc
CXXFLAGS += -fdata-sections -ffunction-sections -Wl,--gc-sections -ggdb -Wunused-variable -Wno-psabi
ifeq ($(MERGE),true)
CXXFLAGS += -fwhole-program
endif
LIBS += -lrt
INCLUDES += -I$(JAVA_HOME)/include/linux
RELEASE_TAG:=$(PROFILER_VERSION)-linux-x64
SOEXT=so
PACKAGE_EXT=tar.gz
OS_TAG=linux
endif
ifeq ($(ARCH_TAG),)
ARCH:=$(shell uname -m)
ifeq ($(ARCH),x86_64)
ARCH_TAG=x64
else ifeq ($(ARCH),aarch64)
ARCH_TAG=arm64
else ifeq ($(ARCH),arm64)
ARCH_TAG=arm64
else ifeq ($(findstring arm,$(ARCH)),arm)
ARCH_TAG=arm32
else ifeq ($(ARCH),ppc64le)
ARCH_TAG=ppc64le
else ifeq ($(ARCH),riscv64)
ARCH_TAG=riscv64
else ifeq ($(ARCH),loongarch64)
ARCH_TAG=loongarch64
else
ARCH_TAG=x86
endif
endif
.PHONY: all release test clean
STATIC_BINARY=$(findstring musl-gcc,$(CC))
ifneq (,$(STATIC_BINARY))
CFLAGS += -static -fdata-sections -ffunction-sections -Wl,--gc-sections
endif
all: build build/$(LIB_PROFILER) build/$(JATTACH) build/$(PROFILER_JAR)
.PHONY: all jar release build-test test clean coverage clean-coverage build-test-java build-test-cpp test-cpp test-java check-md format-md
release: build async-profiler-$(RELEASE_TAG).tar.gz
all: build/bin build/lib build/$(LIB_PROFILER) build/$(ASPROF) jar build/$(JFRCONV) build/$(ASPROF_HEADER)
async-profiler-$(RELEASE_TAG).tar.gz: build/$(LIB_PROFILER) build/$(JATTACH) \
build/$(PROFILER_JAR) profiler.sh LICENSE *.md
tar cvzf $@ $^
jar: build/jar build/$(API_JAR) build/$(CONVERTER_JAR)
build:
mkdir -p build
release: $(PACKAGE_NAME).$(PACKAGE_EXT)
build/$(LIB_PROFILER): src/*.cpp src/*.h
$(CPP) $(CPPFLAGS) -DPROFILER_VERSION=\"$(PROFILER_VERSION)\" $(INCLUDES) -fPIC -shared -o $@ src/*.cpp $(LIBS)
$(PACKAGE_NAME).tar.gz: $(PACKAGE_DIR)
patchelf --remove-needed ld-linux-x86-64.so.2 --remove-needed ld-linux-aarch64.so.1 $(PACKAGE_DIR)/$(LIB_PROFILER)
tar czf $@ -C $(PACKAGE_DIR)/.. $(PACKAGE_NAME)
rm -r $(PACKAGE_DIR)
build/$(JATTACH): src/jattach/jattach.c
$(CC) $(CFLAGS) -DJATTACH_VERSION=\"$(JATTACH_VERSION)\" -o $@ $^
tar czf $(DEBUG_PACKAGE_NAME).tar.gz -C $(DEBUG_PACKAGE_DIR)/.. $(DEBUG_PACKAGE_NAME)
rm -r $(DEBUG_PACKAGE_DIR)
build/$(PROFILER_JAR): src/java/one/profiler/*.java
mkdir -p build/classes
$(JAVAC) -source 6 -target 6 -d build/classes $^
$(JAR) cvf $@ -C build/classes .
rm -rf build/classes
$(PACKAGE_NAME).zip: $(PACKAGE_DIR)
ifneq ($(GITHUB_ACTIONS), true)
codesign -s "Developer ID" -o runtime --timestamp -v $(PACKAGE_DIR)/$(ASPROF) $(PACKAGE_DIR)/$(JFRCONV) $(PACKAGE_DIR)/$(LIB_PROFILER)
endif
ditto -c -k --keepParent $(PACKAGE_DIR) $@
rm -r $(PACKAGE_DIR)
test: all
test/smoke-test.sh
test/thread-smoke-test.sh
test/alloc-smoke-test.sh
test/load-library-test.sh
echo "All tests passed"
$(PACKAGE_DIR): all LICENSE README.md
rm -rf $@
mkdir -p $(PACKAGE_DIR) $(DEBUG_PACKAGE_DIR)
cp -RP build/bin build/lib build/include LICENSE README.md $(PACKAGE_DIR)/
chmod -R 755 $(PACKAGE_DIR)
chmod 644 $(PACKAGE_DIR)/lib/* $(PACKAGE_DIR)/include/* $(PACKAGE_DIR)/LICENSE $(PACKAGE_DIR)/README.md
ifeq ($(OS_TAG),linux)
$(STRIP) --only-keep-debug build/$(LIB_PROFILER) -o $(DEBUG_PACKAGE_DIR)/$(LIB_PROFILER_DEBUG)
$(STRIP) -g $@/$(LIB_PROFILER)
$(OBJCOPY) --add-gnu-debuglink=$(DEBUG_PACKAGE_DIR)/$(LIB_PROFILER_DEBUG) $@/$(LIB_PROFILER)
chmod 644 $(DEBUG_PACKAGE_DIR)/*
endif
build/%:
mkdir -p $@
build/$(ASPROF): src/main/* src/jattach/* src/fdtransfer.h
$(CC) $(CPPFLAGS) $(CFLAGS) $(DEFS) -o $@ src/main/*.cpp src/jattach/*.c
$(STRIP) $@
build/$(JFRCONV): src/launcher/launcher.sh build/$(CONVERTER_JAR)
sed -e 's/PROFILER_VERSION/$(PROFILER_VERSION)/g' -e 's/BUILD_DATE/$(shell date "+%b %d %Y")/g' src/launcher/launcher.sh > $@
chmod +x $@
cat build/$(CONVERTER_JAR) >> $@
build/$(LIB_PROFILER): $(SOURCES) $(HEADERS) $(RESOURCES) $(JAVA_HELPER_CLASSES)
ifeq ($(MERGE),true)
for f in src/*.cpp; do echo '#include "'$$f'"'; done |\
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(DEFS) $(INCLUDES) -fPIC -shared -o $@ -xc++ - $(LIBS)
else
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(DEFS) $(INCLUDES) -fPIC -shared -o $@ $(SOURCES) $(LIBS)
endif
build/$(ASPROF_HEADER): src/asprof.h
mkdir -p build/include
cp -f $< build/include
build/$(API_JAR): $(API_SOURCES) $(JAR_MANIFEST)
mkdir -p build/api
$(JAVAC) $(JAVAC_OPTIONS) -d build/api $(API_SOURCES)
$(JAR) cfm $@ $(JAR_MANIFEST) -C build/api .
$(RM) -r build/api
build/$(CONVERTER_JAR): $(CONVERTER_SOURCES) $(RESOURCES)
mkdir -p build/converter
$(JAVAC) $(JAVAC_OPTIONS) -d build/converter $(CONVERTER_SOURCES)
$(JAR) cfe $@ one.convert.Main -C build/converter . -C src/res .
$(RM) -r build/converter
%.class: %.java
$(JAVAC) -source $(JAVA_TARGET) -target $(JAVA_TARGET) -Xlint:-options -g:none $^
build/test/cpptests: $(CPP_TEST_SOURCES) $(CPP_TEST_HEADER) $(SOURCES) $(HEADERS) $(RESOURCES) $(JAVA_HELPER_CLASSES)
mkdir -p build/test
ifeq ($(MERGE),true)
for f in src/*.cpp test/native/*.cpp; do echo '#include "'$$f'"'; done |\
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(DEFS) $(INCLUDES) $(CPP_TEST_INCLUDES) -fPIC -o $@ -xc++ - $(LIBS)
else
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(DEFS) $(INCLUDES) $(CPP_TEST_INCLUDES) -fPIC -o $@ $(SOURCES) $(CPP_TEST_SOURCES) $(LIBS)
endif
build-test-java: all build/$(TEST_JAR) build/test/build-test-libs build/test/build-test-bins
build-test-cpp: build/test/cpptests build/test/build-test-libs
build-test: build-test-cpp build-test-java
build/test/build-test-libs: $(TEST_LIB_SOURCES)
@mkdir -p $(TEST_LIB_DIR)
$(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libreladyn.$(SOEXT) test/native/libs/reladyn.c
$(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libcallsmalloc.$(SOEXT) test/native/libs/callsmalloc.c
$(CC) -shared -fPIC $(INCLUDES) -Isrc -o $(TEST_LIB_DIR)/libjnimalloc.$(SOEXT) test/native/libs/jnimalloc.c
$(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libmalloc.$(SOEXT) test/native/libs/malloc.c
$(CC) -fno-optimize-sibling-calls -shared -fPIC $(INCLUDES) -Isrc -o $(TEST_LIB_DIR)/libjninativestacks.$(SOEXT) test/native/libs/jninativestacks.c
$(CC) -shared -fPIC $(INCLUDES) -Isrc -o $(TEST_LIB_DIR)/libjninativelocks.$(SOEXT) test/native/libs/jninativelocks.c -lpthread
ifeq ($(OS_TAG),linux)
$(CC) -c -shared -fPIC -o $(TEST_LIB_DIR)/vaddrdif.o test/native/libs/vaddrdif.c
$(LD) -N -shared -o $(TEST_LIB_DIR)/libvaddrdif.$(SOEXT) $(TEST_LIB_DIR)/vaddrdif.o -T test/native/libs/vaddrdif.ld
$(AS) -o $(TEST_LIB_DIR)/multiplematching.o test/native/libs/multiplematching.s
$(LD) -shared -o $(TEST_LIB_DIR)/multiplematching.$(SOEXT) $(TEST_LIB_DIR)/multiplematching.o
$(AS) -o $(TEST_LIB_DIR)/twiceatzero.o test/native/libs/twiceatzero.s
$(LD) -shared -o $(TEST_LIB_DIR)/libtwiceatzero.$(SOEXT) $(TEST_LIB_DIR)/twiceatzero.o --section-start=.seg1=0x4000 -z max-page-size=0x1000
endif
@touch $@
build/test/build-test-bins: $(TEST_BIN_SOURCES)
@mkdir -p $(TEST_BIN_DIR)
$(CC) -o $(TEST_BIN_DIR)/malloc_plt_dyn test/test/nativemem/malloc_plt_dyn.c
$(CC) -o $(TEST_BIN_DIR)/native_api -Isrc test/test/c/native_api.c -ldl
$(CC) -o $(TEST_BIN_DIR)/native_lock_contention test/test/nativelock/native_lock_contention.c -lpthread
$(CC) -o $(TEST_BIN_DIR)/profile_with_dlopen -Isrc test/test/nativemem/profile_with_dlopen.c -ldl
$(CC) -o $(TEST_BIN_DIR)/preload_malloc -Isrc test/test/nativemem/preload_malloc.c -ldl
$(CC) -o $(TEST_BIN_DIR)/nativemem_known_lib_crash -Isrc test/test/nativemem/nativemem_known_lib_crash.c -ldl
$(CXX) -o $(TEST_BIN_DIR)/non_java_app -std=c++11 $(INCLUDES) $(CPP_TEST_INCLUDES) test/test/nonjava/non_java_app.cpp $(LIBS)
@touch $@
test-cpp: build-test-cpp
echo "Running cpp tests..."
LD_LIBRARY_PATH="$(TEST_LIB_DIR)" DYLD_LIBRARY_PATH="$(TEST_LIB_DIR)" build/test/cpptests
test-java: build-test-java
echo "Running tests against $(LIB_PROFILER)"
$(TEST_JAVA) $(TEST_FLAGS) -ea -cp "build/$(TEST_JAR):build/jar/*:$(TEST_DEPS_DIR)/*:$(TEST_GEN_DIR)/*" one.profiler.test.Runner $(subst $(COMMA), ,$(TESTS))
coverage: override FAT_BINARY=false
coverage: clean-coverage
$(MAKE) test-cpp CXXFLAGS_EXTRA="-fprofile-arcs -ftest-coverage -fPIC -O0 --coverage"
mkdir -p build/test/coverage
cd build/test/ && gcovr -r ../.. --html-details --gcov-executable "$(GCOV)" -o coverage/index.html
rm -rf -- -.gc*
# unit tests shouldn't run if the user selects an integration test target
ifeq ($(TESTS),)
TEST_CPP := test-cpp
endif
test: $(TEST_CPP) test-java
$(TEST_DEPS_DIR):
mkdir -p $@
build/$(TEST_JAR): build/$(API_JAR) $(TEST_SOURCES) build/$(CONVERTER_JAR) $(TEST_DEPS_DIR)
rm -rf build/test/classes
mkdir -p build/test/classes
$(JAVAC) -source $(JAVA_TARGET) -target $(JAVA_TARGET) -Xlint:-options -XDignore.symbol.file \
-implicit:none \
-cp "build/jar/*:$(TEST_DEPS_DIR)/*:$(TEST_GEN_DIR)/*:test/stubs" \
-d build/test/classes \
$(TEST_SOURCES)
$(JAR) cf $@ -C build/test/classes .
update-otlp-classes-jar:
@if [ -z "$(OTEL_PROTO_PATH)" ]; then \
echo "'OTEL_PROTO_PATH' is empty"; \
exit 1; \
fi
rm -rf $(TMP_DIR)/gen/java $(TMP_DIR)/build
mkdir -p $(TMP_DIR)/gen/java $(TMP_DIR)/build $(TEST_GEN_DIR)
cd $(OTEL_PROTO_PATH) && protoc --java_out=$(TMP_DIR)/gen/java $$(find . \
-type f \
-name '*.proto' \
-not \( -name 'logs*.proto' -o -name 'metrics*.proto' -o -name 'trace*.proto' -o -name '*service.proto' \))
$(JAVAC) -source $(JAVA_TARGET) \
-target $(JAVA_TARGET) \
-cp $(TEST_DEPS_DIR)/* \
-d $(TMP_DIR)/build \
-Xlint:-options \
$$(find $(TMP_DIR)/gen/java -name "*.java")
$(JAR) cvf $(TEST_GEN_DIR)/opentelemetry-gen-classes.jar -C $(TMP_DIR)/build .
LINT_SOURCES=`ls -1 src/*.cpp src/*/*.cpp | grep -v rustDemangle.cpp`
CLANG_TIDY_ARGS_EXTRA=
cpp-lint:
clang-tidy $(LINT_SOURCES) $(CLANG_TIDY_ARGS_EXTRA) -- -x c++ $(CXXFLAGS) $(INCLUDES) $(DEFS) $(LIBS)
DIFF_BASE=
cpp-lint-diff:
git diff -U0 $(DIFF_BASE) -- 'src/*.cpp' 'src/**/*.cpp' 'src/*.h' 'src/**/*.h' ':!**/rustDemangle.cpp' | \
clang-tidy-diff.py -p1 $(CLANG_TIDY_ARGS_EXTRA) -- -x c++ $(CXXFLAGS) $(INCLUDES) $(DEFS) $(LIBS)
check-md:
prettier -c README.md "docs/**/*.md"
format-md:
prettier -w README.md "docs/**/*.md"
clean-coverage:
$(RM) -rf build/test/cpptests build/test/coverage
clean:
rm -rf build
$(RM) -r build

498
README.md
View File

@@ -1,465 +1,117 @@
# async-profiler
# Async-profiler
This project is a low overhead sampling profiler for Java
that does not suffer from [Safepoint bias problem](http://psy-lob-saw.blogspot.ru/2016/02/why-most-sampling-java-profilers-are.html).
It features HotSpot-specific APIs to collect stack traces
that does not suffer from the [Safepoint bias problem](http://psy-lob-saw.blogspot.ru/2016/02/why-most-sampling-java-profilers-are.html).
It features HotSpot-specific API to collect stack traces
and to track memory allocations. The profiler works with
OpenJDK, Oracle JDK and other Java runtimes based on HotSpot JVM.
OpenJDK and other Java runtimes based on the HotSpot JVM.
async-profiler can trace the following kinds of events:
- CPU cycles
- Hardware and Software performance counters like cache misses, branch misses, page faults, context switches etc.
- Allocations in Java Heap
- Contented lock attempts, including both Java object monitors and ReentrantLocks
Unlike traditional Java profilers, async-profiler monitors non-Java threads
(e.g., GC and JIT compiler threads) and shows native and kernel frames in stack traces.
## Download
What can be profiled:
Latest release:
- CPU time
- Allocations in Java Heap
- Native memory allocations and leaks
- Contended locks
- Hardware and software performance counters like cache misses, page faults, context switches
- and [more](docs/ProfilingModes.md).
- Linux x64: [async-profiler-1.5-linux-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.5/async-profiler-1.5-linux-x64.tar.gz)
- Linux ARM: [async-profiler-1.5-linux-arm.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.5/async-profiler-1.5-linux-arm.tar.gz)
- macOS x64: [async-profiler-1.5-macos-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.5/async-profiler-1.5-macos-x64.tar.gz)
See our [3 hours playlist](https://www.youtube.com/playlist?list=PLNCLTEx3B8h4Yo_WvKWdLvI9mj1XpTKBr)
to learn about more features.
[Previous releases](https://github.com/jvm-profiling-tools/async-profiler/releases)
# Download
## Supported platforms
### Stable release: [4.3](https://github.com/async-profiler/async-profiler/releases/tag/v4.3)
- **Linux** / x64 / x86 / ARM / AArch64
- **macOS** / x64
- Linux x64: [async-profiler-4.3-linux-x64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v4.3/async-profiler-4.3-linux-x64.tar.gz)
- Linux arm64: [async-profiler-4.3-linux-arm64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v4.3/async-profiler-4.3-linux-arm64.tar.gz)
- macOS arm64/x64: [async-profiler-4.3-macos.zip](https://github.com/async-profiler/async-profiler/releases/download/v4.3/async-profiler-4.3-macos.zip)
- Profile converters: [jfr-converter.jar](https://github.com/async-profiler/async-profiler/releases/download/v4.3/jfr-converter.jar)
Note: macOS profiling is limited to user space code only.
### Nightly builds
## CPU profiling
[The most recent binaries](https://github.com/async-profiler/async-profiler/releases/tag/nightly) corresponding
to the latest successful commit in `master`.
In this mode profiler collects stack trace samples that include **Java** methods,
**native** calls, **JVM** code and **kernel** functions.
For a build corresponding to one of the previous commits, go to
[Nightly Builds](https://github.com/async-profiler/async-profiler/actions/workflows/test-and-publish-nightly.yml),
click the desired build and scroll down to the artifacts section. These binaries are kept for 30 days.
The general approach is receiving call stacks generated by `perf_events`
and matching them up with call stacks generated by `AsyncGetCallTrace`,
in order to produce an accurate profile of both Java and native code.
Additionally, async-profiler provides a workaround to recover stack traces
in some [corner cases](https://bugs.openjdk.java.net/browse/JDK-8178287)
where `AsyncGetCallTrace` fails.
# Quick start
This approach has the following advantages compared to using `perf_events`
directly with a Java agent that translates addresses to Java method names:
* Works on older Java versions because it doesn't require
`-XX:+PreserveFramePointer`, which is only available in JDK 8u60 and later.
* Does not introduce the performance overhead from `-XX:+PreserveFramePointer`,
which can in rare cases be as high as 10%.
* Does not require generating a map file to map Java code addresses to method
names.
* Works with interpreter frames.
* Does not require writing out a perf.data file for further processing in
user space scripts.
## ALLOCATION profiling
Instead of detecting CPU-consuming code, the profiler can be configured
to collect call sites where the largest amount of heap memory is allocated.
async-profiler does not use intrusive techniques like bytecode instrumentation
or expensive DTrace probes which have significant performance impact.
It also does not affect Escape Analysis or prevent from JIT optimizations
like allocation elimination. Only actual heap allocations are measured.
The profiler features TLAB-driven sampling. It relies on HotSpot-specific
callbacks to receive two kinds of notifications:
- when an object is allocated in a newly created TLAB;
- when an object is allocated on a slow path outside TLAB.
This means not each allocation is counted, but only allocations every _N_ kB,
where _N_ is the average size of TLAB. This makes heap sampling very cheap
and suitable for production. On the other hand, the collected data
may be incomplete, though in practice it will often reflect the top allocation
sources.
Sampling interval can be adjusted with `-i` option.
For example, `-i 500k` will take one sample after 500 KB of allocated
space on average. However, intervals less than TLAB size will not take effect.
If you want to profile allocations with higher frequency, reduce the TLAB size,
e.g.
```
-XX:MinTLABSize=1 -XX:TLABSize=1 -XX:-ResizeTLAB
```
Unlike Java Mission Control which uses similar approach, async-profiler
does not require Java Flight Recorder or any other JDK commercial feature.
It is completely based on open source technologies and it works with OpenJDK.
The minimum supported JDK version is 7u40 where the TLAB callbacks appeared.
Heap profiler requires HotSpot debug symbols. Oracle JDK already has them
embedded in `libjvm.so`, but in OpenJDK builds they are typically shipped
in a separate package. For example, to install OpenJDK debug symbols on
Debian / Ubuntu, run:
```
# apt install openjdk-8-dbg
```
or for OpenJDK 11:
```
# apt install openjdk-11-dbg
```
On Gentoo the `icedtea` OpenJDK package can be built with the per-package setting
`FEATURES="nostrip"` to retain symbols.
### Wall-clock profiling
`-e wall` option tells async-profiler to sample all threads equally every given
period of time regardless of thread status: Running, Sleeping or Blocked.
For instance, this can be helpful when profiling application start-up time.
Wall-clock profiler is most useful in per-thread mode: `-t`.
Example: `./profiler.sh -e wall -t -i 5ms -f result.svg 8983`
## Building
Build status: [![Build Status](https://travis-ci.org/jvm-profiling-tools/async-profiler.svg?branch=master)](https://travis-ci.org/jvm-profiling-tools/async-profiler)
Make sure the `JAVA_HOME` environment variable points to your JDK installation,
and then run `make`. GCC is required. After building, the profiler agent binary
will be in the `build` subdirectory. Additionally, a small application `jattach`
that can load the agent into the target process will also be compiled to the
`build` subdirectory.
## Basic Usage
As of Linux 4.6, capturing kernel call stacks using `perf_events` from a non-
root process requires setting two runtime variables. You can set them using
sysctl or as follows:
In a typical use case, profiling a Java application is just a matter of a running `asprof` with a PID of a
running Java process.
```
# echo 1 > /proc/sys/kernel/perf_event_paranoid
# echo 0 > /proc/sys/kernel/kptr_restrict
$ asprof -d 30 -f flamegraph.html <PID>
```
To run the agent and pass commands to it, the helper script `profiler.sh`
is provided. A typical workflow would be to launch your Java application,
attach the agent and start profiling, exercise your performance scenario, and
then stop profiling. The agent's output, including the profiling results, will
be displayed in the Java application's standard output.
The above command translates to: run profiler for 30 seconds and save results to `flamegraph.html`
as an interactive [Flame Graph](docs/FlamegraphInterpretation.md) that can be viewed in a browser.
Example:
[![FlameGraph](/.assets/images/flamegraph.png)](https://htmlpreview.github.io/?https://github.com/async-profiler/async-profiler/blob/master/.assets/html/flamegraph.html)
```
$ jps
9234 Jps
8983 Computey
$ ./profiler.sh start 8983
$ ./profiler.sh stop 8983
```
Find more details in the [Getting started guide](docs/GettingStarted.md).
Alternatively, you may specify `-d` (duration) argument to profile
the application for a fixed period of time with a single command.
# Building
```
$ ./profiler.sh -d 30 8983
```
### Build status
By default, the profiling frequency is 100Hz (every 10ms of CPU time).
Here is a sample of the output printed to the Java application's terminal:
[![Build Status](https://github.com/async-profiler/async-profiler/actions/workflows/test-and-publish-nightly.yml/badge.svg?branch=master)](https://github.com/async-profiler/async-profiler/actions/workflows/test-and-publish-nightly.yml)
```
--- Execution profile ---
Total samples: 687
Unknown (native): 1 (0.15%)
### Minimum requirements
--- 6790000000 (98.84%) ns, 679 samples
[ 0] Primes.isPrime
[ 1] Primes.primesThread
[ 2] Primes.access$000
[ 3] Primes$1.run
[ 4] java.lang.Thread.run
- make
- GCC 7.5.0+ or Clang 7.0.0+
- Static version of libstdc++ (e.g. on Amazon Linux 2023: `yum install libstdc++-static`)
- JDK 11+
... a lot of output omitted for brevity ...
### How to build
ns percent samples top
---------- ------- ------- ---
6790000000 98.84% 679 Primes.isPrime
40000000 0.58% 4 __do_softirq
Make sure `gcc`, `g++` and `java` are available on the `PATH`.
Navigate to the root directory with async-profiler sources and run `make`.
async-profiler launcher will be available at `build/bin/asprof`.
... more output omitted ...
```
Other Makefile targets:
This indicates that the hottest method was `Primes.isPrime`, and the hottest
call stack leading to it comes from `Primes.primesThread`.
- `make test` - run unit and integration tests;
- `make release` - package async-profiler binaries as `.tar.gz` (Linux) or `.zip` (macOS).
## Launching as an Agent
### Supported platforms
If you need to profile some code as soon as the JVM starts up, instead of using the `profiler.sh` script,
it is possible to attach async-profiler as an agent on the command line. For example:
| | Officially maintained builds | Other available ports |
| --------- | ---------------------------- | ----------------------------------------- |
| **Linux** | x64, arm64 | x86, arm32, ppc64le, riscv64, loongarch64 |
| **macOS** | x64, arm64 | |
```
$ java -agentpath:/path/to/libasyncProfiler.so=start,svg,file=profile.svg ...
```
# Documentation
Agent library is configured through the JVMTI argument interface. The format of the arguments string is described [in the source code](https://github.com/jvm-profiling-tools/async-profiler/blob/af94b0e55178c46e17c573a65c498d25b58b641b/src/arguments.cpp#L26). The `profiler.sh` script actually
converts command line arguments to the that format.
## Basic usage
For instance, `-e alloc` is converted to `event=alloc`, `-f profile.svg` is converted to `file=profile.svg` and so on. But some arguments are processed directly by `profiler.sh` script. E.g. `-d 5` results in 3 actions: 1) attaching profiler agent with start command, sleeping for 5 seconds, and then attaching the agent again with stop command.
- [Getting Started](docs/GettingStarted.md)
- [Profiler Options](docs/ProfilerOptions.md)
- [Profiling Modes](docs/ProfilingModes.md)
- [Integrating async-profiler](docs/IntegratingAsyncProfiler.md)
- [Profiling In Container](docs/ProfilingInContainer.md)
## Flame Graph visualization
## Profiler output
async-profiler provides out-of-the-box [Flame Graph](https://github.com/BrendanGregg/FlameGraph) support.
Specify `-o svg` argument to dump profiling results as an interactive SVG
immediately viewable in all mainstream browsers.
Also, SVG output format will be chosen automatically if the target
filename ends with `.svg`.
- [Output Formats](docs/OutputFormats.md)
- [FlameGraph Interpretation](docs/FlamegraphInterpretation.md)
- [JFR Visualization](docs/JfrVisualization.md)
- [Converter Usage](docs/ConverterUsage.md)
- [Heatmap](docs/Heatmap.md)
```
$ jps
9234 Jps
8983 Computey
$ ./profiler.sh -d 30 -f /tmp/flamegraph.svg 8983
```
## Advanced usage
![Example](https://github.com/jvm-profiling-tools/async-profiler/blob/master/demo/SwingSet2.svg)
## Profiler Options
The following is a complete list of the command-line options accepted by
`profiler.sh` script.
* `start` - starts profiling in semi-automatic mode, i.e. profiler will run
until `stop` command is explicitly called.
* `stop` - stops profiling and prints the report.
* `status` - prints profiling status: whether profiler is active and
for how long.
* `list` - show the list of available profiling events. This option still
requires PID, since supported events may differ depending on JVM version.
* `-d N` - the profiling duration, in seconds. If no `start`, `stop`
or `status` option is given, the profiler will run for the specified period
of time and then automatically stop.
Example: `./profiler.sh -d 30 8983`
* `-e event` - the profiling event: `cpu`, `alloc`, `lock`, `cache-misses` etc.
Use `list` to see the complete list of available events.
In allocation profiling mode the top frame of every call trace is the class
of the allocated object, and the counter is the heap pressure (the total size
of allocated TLABs or objects outside TLAB).
In lock profiling mode the top frame is the class of lock/monitor, and
the counter is number of nanoseconds it took to enter this lock/monitor.
Two special event types are supported on Linux: hardware breakpoints
and kernel tracepoints:
- `-e mem:<func>[:rwx]` sets read/write/exec breakpoint at function
`<func>`. The format of `mem` event is the same as in `perf-record`.
Execution breakpoints can be also specified by the function name,
e.g. `-e malloc` will trace all calls of native `malloc` function.
- `-e trace:<id>` sets a kernel tracepoint. It is possible to specify
tracepoint symbolic name, e.g. `-e syscalls:sys_enter_open` will trace
all `open` syscalls.
* `-i N` - sets the profiling interval in nanoseconds or in other units,
if N is followed by `ms` (for milliseconds), `us` (for microseconds)
or `s` (for seconds). Only CPU active time is counted. No samples
are collected while CPU is idle. The default is 10000000 (10ms).
Example: `./profiler.sh -i 500us 8983`
* `-j N` - sets the Java stack profiling depth. This option will be ignored if N is greater
than default 2048.
Example: `./profiler.sh -j 30 8983`
* `-b N` - sets the frame buffer size, in the number of Java
method ids that should fit in the buffer. If you receive messages about an
insufficient frame buffer size, increase this value from the default.
Example: `./profiler.sh -b 5000000 8983`
* `-t` - profile threads separately. Each stack trace will end with a frame
that denotes a single thread.
Example: `./profiler.sh -t 8983`
* `-s` - print simple class names instead of FQN.
* `-g` - print method signatures.
* `-a` - annotate Java method names by adding `_[j]` suffix.
* `-o fmt[,fmt...]` - specifies what information to dump when profiling ends.
This is a comma-separated list of the following options:
- `summary` - dump basic profiling statistics;
- `traces[=N]` - dump call traces (at most N samples);
- `flat[=N]` - dump flat profile (top N hot methods);
- `jfr` - dump events in Java Flight Recorder format readable by Java Mission Control.
This *does not* require JDK commercial features to be enabled.
- `collapsed[=C]` - dump collapsed call traces in the format used by
[FlameGraph](https://github.com/brendangregg/FlameGraph) script. This is
a collection of call stacks, where each line is a semicolon separated list
of frames followed by a counter.
- `svg[=C]` - produce Flame Graph in SVG format.
- `tree[=C]` - produce call tree in HTML format.
--reverse option will generate backtrace view.
`C` is a counter type:
- `samples` - the counter is a number of samples for the given trace;
- `total` - the counter is a total value of collected metric, e.g. total allocation size.
The default format is `summary,traces=200,flat=200`.
* `--title TITLE`, `--width PX`, `--height PX`, `--minwidth PX`, `--reverse` - FlameGraph parameters.
Example: `./profiler.sh -f profile.svg --title "Sample CPU profile" --minwidth 0.5 8983`
* `-f FILENAME` - the file name to dump the profile information to.
Example: `./profiler.sh -o collapsed -f /tmp/traces.txt 8983`
* `--all-user` - include only user-mode events. This option is helpful when kernel profiling
is restricted by `perf_event_paranoid` settings.
`--all-kernel` is its counterpart option for including only kernel-mode events.
* `-v`, `--version` - prints the version of profiler library. If PID is specified,
gets the version of the library loaded into the given process.
## Profiling Java in a container
It is possible to profile Java processes running in a Docker or LXC container
both from within a container and from the host system.
When profiling from the host, `pid` should be the Java process ID in the host
namespace. Use `ps aux | grep java` or `docker top <container>` to find
the process ID.
async-profiler should be run from the host by a privileged user - it will
automatically switch to the proper pid/mount namespace and change
user credentials to match the target process. Also make sure that
the target container can access `libasyncProfiler.so` by the same
absolute path as on the host.
By default, Docker container restricts the access to `perf_event_open`
syscall. So, in order to allow profiling inside a container, you'll need
to modify [seccomp profile](https://docs.docker.com/engine/security/seccomp/)
or disable it altogether with `--security-opt=seccomp:unconfined` option.
Alternatively, if changing Docker configuration is not possible,
you may fall back to `-e itimer` profiling mode, see [Troubleshooting](#troubleshooting).
## Restrictions/Limitations
* On most Linux systems, `perf_events` captures call stacks with a maximum depth
of 127 frames. On recent Linux kernels, this can be configured using
`sysctl kernel.perf_event_max_stack` or by writing to the
`/proc/sys/kernel/perf_event_max_stack` file.
* Profiler allocates 8kB perf_event buffer for each thread of the target process.
Make sure `/proc/sys/kernel/perf_event_mlock_kb` value is large enough
(more than `8 * threads`) when running under unprivileged user.
Otherwise the message _"perf_event mmap failed: Operation not permitted"_
will be printed, and no native stack traces will be collected.
* There is no bullet-proof guarantee that the `perf_events` overflow signal
is delivered to the Java thread in a way that guarantees no other code has run,
which means that in some rare cases, the captured Java stack might not match
the captured native (user+kernel) stack.
* You will not see the non-Java frames _preceding_ the Java frames on the
stack. For example, if `start_thread` called `JavaMain` and then your Java
code started running, you will not see the first two frames in the resulting
stack. On the other hand, you _will_ see non-Java frames (user and kernel)
invoked by your Java code.
* No Java stacks will be collected if `-XX:MaxJavaStackTraceDepth` is zero
or negative.
* Too short profiling interval may cause continuous interruption of heavy
system calls like `clone()`, so that it will never complete;
see [#97](https://github.com/jvm-profiling-tools/async-profiler/issues/97).
The workaround is simply to increase the interval.
* When agent is not loaded at JVM startup (by using -agentpath option) it is
highly recommended to use `-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints` JVM flags.
Without those flags the profiler will still work correctly but results might be
less accurate e.g. without `-XX:+DebugNonSafepoints` there is a high chance that simple inlined methods will not appear in the profile. When agent is attached at runtime `CompiledMethodLoad` JVMTI event
enables debug info, but only for methods compiled after the event is turned on.
- [CPU Sampling Engines](docs/CpuSamplingEngines.md)
- [Stack Walking Modes](docs/StackWalkingModes.md)
- [Advanced Stacktrace Features](docs/AdvancedStacktraceFeatures.md)
- [Profiling Non-Java Applications](docs/ProfilingNonJavaApplications.md)
## Troubleshooting
```
Failed to change credentials to match the target process: Operation not permitted
```
Due to limitation of HotSpot Dynamic Attach mechanism, the profiler must be run
by exactly the same user (and group) as the owner of target JVM process.
If profiler is run by a different user, it will try to automatically change
current user and group. This will likely succeed for `root`, but not for
other users, resulting in the above error.
```
Could not start attach mechanism: No such file or directory
```
The profiler cannot establish communication with the target JVM through UNIX domain socket.
Usually this happens in one of the following cases:
1. Attach socket `/tmp/.java_pidNNN` has been deleted. It is a common
practice to clean `/tmp` automatically with some scheduled script.
Configure the cleanup software to exclude `.java_pid*` files from deletion.
How to check: run `lsof -p PID | grep java_pid`
If it lists a socket file, but the file does not exist, then this is exactly
the described problem.
2. JVM is started with `-XX:+DisableAttachMechanism` option.
3. `/tmp` directory of Java process is not physically the same directory
as `/tmp` of your shell, because Java is running in a container or in
`chroot` environment. `jattach` attempts to solve this automatically,
but it might lack the required permissions to do so.
Check `strace build/jattach PID properties`
4. JVM is busy and cannot reach a safepoint. For instance,
JVM is in the middle of long-running garbage collection.
How to check: run `kill -3 PID`. Healthy JVM process should print
a thread dump and heap info in its console.
```
Failed to inject profiler into <pid>
```
The connection with the target JVM has been established, but JVM is unable to load profiler shared library.
Make sure the user of JVM process has permissions to access `libasyncProfiler.so` by exactly the same absolute path.
For more information see [#78](https://github.com/jvm-profiling-tools/async-profiler/issues/78).
```
Perf events unavailble. See stderr of the target process.
```
`perf_event_open()` syscall has failed. The error message is printed to the error stream
of the target JVM.
Typical reasons include:
1. `/proc/sys/kernel/perf_event_paranoid` is set to restricted mode (>=2).
2. seccomp disables perf_event_open API in a container.
3. OS runs under a hypervisor that does not virtualize performance counters.
4. perf_event_open API is not supported on this system, e.g. WSL.
If changing the configuration is not possible, you may fall back to
`-e itimer` profiling mode. It is similar to `cpu` mode, but does not
require perf_events support. As a drawback, there will be no kernel
stack traces.
```
No AllocTracer symbols found. Are JDK debug symbols installed?
```
It might be needed to install the package with OpenJDK debug symbols.
See [Allocation profiling](#allocation-profiling) for details.
Note that allocation profiling is not supported on JVMs other than HotSpot, e.g. Zing.
```
VMStructs unavailable. Unsupported JVM?
```
JVM shared library does not export `gHotSpotVMStructs*` symbols -
apparently this is not a HotSpot JVM. Sometimes the same message
can be also caused by an incorrectly built JDK
(see [#218](https://github.com/jvm-profiling-tools/async-profiler/issues/218)).
In these cases installing JDK debug symbols may solve the problem.
```
[frame_buffer_overflow]
```
This message in the output means there was not enough space to store all call traces.
Consider increasing frame buffer size with `-b` option.
For known issues faced while running async-profiler and their detailed troubleshooting,
please refer [here](docs/Troubleshooting.md).

9
SECURITY.md Normal file
View File

@@ -0,0 +1,9 @@
## Reporting Security Issues
We take all security reports seriously.
When we receive such reports,
we will investigate and subsequently address
any potential vulnerabilities as quickly as possible.
If you discover a potential security issue in this project,
please notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/).
Please do *not* create a public GitHub issue in this project.

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 141 KiB

View File

@@ -0,0 +1,3 @@
FROM public.ecr.aws/bellsoft/alpaquita-linux-gcc:15.2-musl
RUN apk add --no-cache liberica21-jdk util-linux-misc curl

3
docker/alpine.Dockerfile Normal file
View File

@@ -0,0 +1,3 @@
FROM public.ecr.aws/docker/library/amazoncorretto:11-alpine-jdk
RUN apk add --no-cache make gcc g++ linux-headers musl-dev util-linux patchelf gcovr bash tar curl

View File

@@ -0,0 +1,37 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2
RUN amazon-linux-extras enable python3.8
RUN yum update -y && yum install -y git make python38 gcc10 gcc10-c++ binutils tar
ARG node_version=20.19.1
ARG node_sha256=babcd5b9e3216510b89305e6774bcdb2905ca98ff60028b67f163eb8296b6665
RUN curl -L --output node.tar.gz https://github.com/nodejs/node/archive/refs/tags/v${node_version}.tar.gz
RUN echo ${node_sha256} node.tar.gz | sha256sum -c
RUN mkdir /node
RUN tar xf node.tar.gz -C /node --strip-components=1
WORKDIR /node
ENV CC=gcc10-cc
ENV CXX=gcc10-c++
RUN ./configure
RUN make -j4 -s > /dev/null
RUN make install
FROM public.ecr.aws/amazonlinux/amazonlinux:2
COPY --from=0 /usr/local/bin/node /usr/local/bin/node
RUN amazon-linux-extras enable python3.8 && \
yum update -y && \
yum install -y gcc-c++ binutils make java-11-amazon-corretto patchelf tar python38 && \
yum clean all && \
rm -rf /var/cache/yum && \
python -m ensurepip && \
python -m pip install gcovr
ENV NODE_JS_LOCATION=/__e/node20
RUN cat <<EOF > /root/setup.sh
#!/bin/sh
mkdir -p "$NODE_JS_LOCATION/bin"
ln --force --symbolic "/usr/local/bin/node" "$NODE_JS_LOCATION/bin/node"
EOF

View File

@@ -0,0 +1,8 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2023
RUN yum update -y && \
yum install -y binutils findutils make tar gcc-c++ util-linux && \
yum clean all && \
rm -rf /var/cache/yum && \
python3 -m ensurepip && \
python3 -m pip install gcovr

View File

@@ -0,0 +1,10 @@
# Image for all tasks related to static code analysis in async-profiler
FROM public.ecr.aws/docker/library/amazoncorretto:11-alpine-jdk
ADD --chmod=555 https://raw.githubusercontent.com/llvm/llvm-project/67be4fe3d5fd986a3149de3806bcf2c92320015e/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py /usr/bin/
RUN apk add --no-cache clang-extra-tools linux-headers make python3 git py3-pip bash
# Needed by clang-tidy-diff.py to merge multiple results in one file.
# '--break-system-packages' is needed because Alpine does not like other package managers than 'apk' ('pip' in this case) to install
# software globally, but it's safe to do in this case.
RUN pip install --break-system-packages pyyaml
ENV CPLUS_INCLUDE_PATH="/usr/lib/jvm/java-11-amazon-corretto/include:/usr/lib/jvm/java-11-amazon-corretto/include/linux"

32
docker/debian.Dockerfile Normal file
View File

@@ -0,0 +1,32 @@
# Image for building async-profiler release packages
# Stage 0: download and build musl
FROM public.ecr.aws/debian/debian:10-slim
RUN apt-get update && apt-get install -y --no-install-recommends \
sudo libicu-dev patchelf curl make g++ openjdk-11-jdk-headless gcovr && \
rm -rf /var/cache/apt /var/lib/apt/lists/*
ARG musl_src=musl-1.2.5
ARG musl_sha256=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4
ADD https://musl.libc.org/releases/${musl_src}.tar.gz /
RUN echo ${musl_sha256} ${musl_src}.tar.gz | sha256sum -c
RUN ["/bin/bash", "-c", "\
tar xfz ${musl_src}.tar.gz && \
cd /${musl_src} && \
./configure --disable-shared --prefix=/usr/local/musl && \
make -j`nproc` && make install && make clean && \
ln -s /usr/include/$(arch)-linux-gnu/asm /usr/include/{asm-generic,linux} /usr/local/musl/include/"]
# Stage 1: install build tools + copy musl toolchain from the previous step
FROM public.ecr.aws/debian/debian:10-slim
# The following command should be exactly the same as at stage 0 to benefit from caching.
# libicu-dev is needed for the github actions runner
RUN apt-get update && apt-get install -y --no-install-recommends \
sudo libicu-dev patchelf curl make g++ openjdk-11-jdk-headless gcovr && \
rm -rf /var/cache/apt /var/lib/apt/lists/*
COPY --from=0 /usr/local/musl /usr/local/musl

View File

@@ -0,0 +1,35 @@
# Advanced Stacktrace Features
## Display JIT compilation task
Async-profiler samples JIT compiler threads just the same way as Java threads, and hence can show
CPU percentage spent on JIT compilation. At the same time, Java methods are different:
some take more resources to compile, other take less. Furthermore, there are cases when
a bug in C2 compiler causes a JIT thread to stuck in an infinite loop consuming 100% CPU.
Async-profiler can highlight which particular Java methods take most CPU time to compile.
![](/.assets/images/comptask_feature.png)
The feature can be enabled with the option `-F comptask` (or its agent equivalent `features=comptask`).
## Display actual implementation in vtable
In some applications, a significant amount of CPU time is spent on dispatching megamorphic virtual/interface calls.
async-profiler shows a pseudo-frame on top of v/itable stub with the actual type of object the virtual method is
called on. This should make clear the proportion of different receivers for the particular call site.
![](/.assets/images/vtable_feature.png)
The feature can be enabled with the option `-F vtable` (or its agent equivalent `features=vtable`).
## Display instruction addresses
Sometimes, for low-level performance analysis, it is important to know where exactly
CPU time is spent inside a method. As an intermediate step to the instruction-level
profiling, async-profiler provides an option to record PC address of the currently
running method for each execution sample. In this case, each stack trace will include
a synthetic frame with the address at the top of every stack trace.
![](/.assets/images/pcaddr_feature.png)
The feature can be enabled with the option `-F pcaddr` (or its agent equivalent `features=pcaddr`).

177
docs/ConverterUsage.md Normal file
View File

@@ -0,0 +1,177 @@
# Converter Usage
async-profiler provides `jfrconv` utility to convert between different profile output formats.
`jfrconv` can be found at the same location as the `asprof` binary. Converter is also available
as a standalone Java application: [`jfr-converter.jar`](https://github.com/async-profiler/async-profiler/releases/latest/download/jfr-converter.jar).
## Supported conversions
The tool can convert several source formats into various outputs. The conversion capabilities are summarized below:
| Source format | to html | to collapsed | to pprof | to pb.gz | to heatmap | to otlp |
| ------------- | ------- | ------------ | -------- | -------- | ---------- | ------- |
| jfr | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| html | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
| collapsed | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
## Usage
```
jfrconv [options] <input> [<input>...] <output>
```
The output format specified can be only one at a time for conversion from one format to another.
```
Conversion options:
-o --output FORMAT, -o can be omitted if the output file extension unambiguously determines the format, e.g. profile.collapsed
FORMAT can be any of the following:
# collapsed: This is a collection of call stacks, where each line is a semicolon separated
list of frames followed by a counter. This is used by the FlameGraph script to
generate the FlameGraph visualization of the profile data.
# html: FlameGraph is a hierarchical representation of call traces of the profiled
software in a color coded format that helps to identify a particular resource
usage like CPU and memory for the application.
# pprof: pprof is a profiling visualization and analysis tool from Google. More details on
pprof on the official github page https://github.com/google/pprof.
# pb.gz: This is a compressed version of pprof output.
# heatmap: A single page interactive heatmap that allows to explore profiling events
on a timeline.
# otlp: OpenTelemetry profile format.
Differential Flame Graph:
--diff <base-profile> <new-profile>
JFR options:
--cpu Generate only CPU profile during conversion
--cpu-time Generate only CPU profile, using CPUTimeSample events
--wall Generate only Wall clock profile during conversion
--alloc Generate only Allocation profile during conversion
--live Build allocation profile from live objects only during conversion
--nativemem Generate native memory allocation profile
--leak Only include memory leaks in nativemem
--tail RATIO Ignore tail allocations for leak profiling (10% by default)
--lock Generate only lock contention profile during conversion
--nativelock Generate only native (pthread) lock contention profile
--trace Convert only MethodTrace events
-t --threads Split stack traces by threads
-s --state LIST Filter thread states: runnable, sleeping, default. State name is case insensitive
and can be abbreviated, e.g. -s r
--classify Classify samples into predefined categories
--total Accumulate total value (time, bytes, etc.) instead of samples
--lines Show line numbers
--bci Show bytecode indices
--simple Simple class names instead of fully qualified names
--norm Normalize names of hidden classes/lambdas, e.g. Original JFR transforms
lambda names to something like pkg.ClassName$$Lambda+0x00007f8177090218/543846639
which gets normalized to pkg.ClassName$$Lambda
--dot Dotted class names, e.g. java.lang.String instead of java/lang/String
--from TIME Start time in ms (absolute or relative)
--to TIME End time in ms (absolute or relative)
TIME can be:
# an absolute timestamp specified in millis since epoch;
# an absolute time in hh:mm:ss or yyyy-MM-dd'T'hh:mm:ss format;
# a relative time from the beginning of recording;
# a relative time from the end of recording (a negative number).
--latency MS Retain only samples within MethodTraces of at least MS milliseconds
Flame Graph options:
--title STRING Convert to Flame Graph with provided title
--minwidth X Skip frames smaller than X%
--grain X Coarsen Flame Graph to the given grain size
--skip N Skip N bottom frames
-r --reverse Reverse stack traces (defaults to icicle graph)
-i --inverted Toggles the layout for reversed stacktraces from icicle to flamegraph
and for default stacktraces from flamegraph to icicle
-I --include REGEX Include only stacks with the specified frames, e.g. -I 'MyApplication\.main' -I 'VMThread.*'
-X --exclude REGEX Exclude stacks with the specified frames, e.g. -X '.*pthread_cond_(wait|timedwait).*'
--highlight REGEX Highlight frames matching the given pattern
```
See the [profiler options documentation](ProfilerOptions.md#options-applicable-to-flamegraph-and-tree-view-outputs-only) for details on the `--reverse` and `--inverted` options.
## jfrconv examples
`jfrconv` utility is provided in `bin` directory of the async-profiler package.
It requires JRE to be installed on the system.
### Generate Flame Graph from JFR
If no output file is specified, it defaults to a Flame Graph output.
```
jfrconv foo.jfr
```
Profiling in JFR mode allows multi-mode profiling. So the command above will generate a Flame Graph
output, however, for a multi-mode profile output with both `cpu` and `wall-clock` events, the
Flame Graph will have an aggregation of both in the view. Such a view wouldn't make much sense and
hence it is advisable to use JFR conversion filter options like `--cpu` to filter out events
during a conversion.
```
jfrconv --cpu foo.jfr
# which is equivalent to:
# jfrconv --cpu -o html foo.jfr foo.html
```
for HTML output as HTML is the default format for conversion from JFR.
### Flame Graph options
To add a custom title to the generated Flame Graph, use `--title`, which has the default value `Flame Graph`:
```
jfrconv --cpu foo.jfr foo.html -r --title "Custom Title"
```
### Differential Flame Graph
To find performance regressions, it may be useful to compare current profile
to a previous one that serves as a baseline. Differential Flame Graph
visualizes such a comparsion with a special color scheme:
- Red color denotes frames with more samples comparing to the baseline (i.e. regression);
- Blue is for frames with less samples;
- Yellow are new frames that were absent in the baseline.
The more intense the color, the larger the delta.
For each different frame, the delta value is displayed in a tooltip.
![](/.assets/images/flamegraph_diff.png)
Differential Flame Graph takes the shape of the current profile:
all frames have exactly the same size as in the normal Flame Graph.
This means, frames that exist only in the base profile will not be visible.
To see such frames, create another differential Flame Graph,
swapping the base and the current input file.
To create differential Flame Graph, run `jfrconv --diff` with two input files:
basline profile and new profile. Both files can be in JFR, HTML, or collapsed format.
Other converter options work as usual.
```
jfrconv --cpu --diff baseline.jfr new.jfr diff.html
```
Output file name is optional. If omitted, `jfrconv` takes the name
of the second input file, replacing its extension with `.diff.html`.
## Standalone converter examples
Standalone converter jar is provided in
[Download](https://github.com/async-profiler/async-profiler/?tab=readme-ov-file#Download).
It accepts the same parameters as `jfrconv`.
Below is an example usage:
```
java -jar jfr-converter.jar --cpu foo.jfr --reverse --title "Application CPU profile"
```

View File

@@ -0,0 +1,76 @@
# CPU Sampling Engines
Async-profiler has three options for CPU profiling: `-e cpu`, `-e itimer` and `-e ctimer`.
## cpu
`cpu` mode measures CPU time spent by the running threads. For example,
if an application uses 2 cpu cores, each with 30% utilization, and the sampling interval is
10ms, then the profiler will collect about `2 * 0.3 * 100 = 60` samples per second.
In other words, 1 profiling sample means that one CPU core was actively running for N nanoseconds,
where N is the profiling interval.
On Linux, `cpu` mode relies on [perf_events](https://man7.org/linux/man-pages/man2/perf_event_open.2.html).
One `perf_event` descriptor is created for each running thread and configured to generate a signal
every `N` nanoseconds of CPU time. This is the most accurate CPU sampler available in async-profiler
and the only one that can obtain kernel stack traces. It, however, comes with certain restrictions.
Most importantly, OS configuration may limit access to `perf_events` API, e.g.,
by `kernel.perf_event_paranoid` sysctl or by seccomp (which is often the case in a Docker container).
If `perf_events` are available, but kernel symbols are hidden (e.g., by `kernel.kptr_resitrct` setting),
async-profiler continues to use `perf_events`, emits a warning and does not show kernel stack traces.
Another important thing to consider is that `cpu` sampling engine allocates a descriptor per thread.
This means, if an application has too many threads and OS limit for the maximum number of open descriptors
(`ulimit -n`) is too low, an application may run out of file descriptors. The workaround
is to simply increase file descriptor limit.
## itimer
`itimer` mode is based on [setitimer(ITIMER_PROF)](https://man7.org/linux/man-pages/man2/setitimer.2.html)
syscall, which ideally generates a signal every given interval of CPU time consumed by the process.
Ideally, both `itimer` and `cpu` should collect the same number of samples. Typically,
profiles indeed look very similar. However, in [some cases](https://github.com/golang/go/issues/14434),
`cpu` profile appears more accurate, since a signal is delivered exactly to the thread
that overflowed a hardware counter. In contrast, `itimer` has the following limitations:
- Only one `itimer` signal can be delivered to a process at a time.
- Signals are not distributed evenly between running threads.
- Sampling resolution is limited by the size of [jiffies](https://man7.org/linux/man-pages/man7/time.7.html).
`itimer` profiles may be even less accurate on macOS, where `itimer` signals are often biased
towards system calls.
The main advantage of `itimer` is that it works in containers and does not consume file descriptors.
## ctimer
`ctimer` is a Linux-specific alternative for `cpu` profiling mode to overcome limitations
of `perf_events`, such as `perf_event_paraniod` setting, seccomp restriction or a low limit
for the number of open file descriptors. `ctimer` mode relies on
[timer_create](https://man7.org/linux/man-pages/man2/timer_create.2.html) API.
It combines benefits of `-e cpu` and `-e itimer`, except that it does not allow collecting kernel stacks.
Like with `itimer`, `ctimer` resolution is limited by the size of the jiffy -
kernel `HZ` constant, which is typically equal to 100 or 250, meaning that the minimum supported
profiling interval is 10ms or 4ms respectively.
## Summary
Here is a summary of advantages and drawbacks of all CPU profiling engines:
| Attribute | cpu (perf_events) | itimer | ctimer |
| --------------------------------- | :---------------: | :----: | :----: |
| Can collect kernel stack traces | ✅ | ❌ | ❌ |
| High resolution | ✅ | ❌ | ❌ |
| Accuracy / fairness | ✅ | ❌ | 🆗 |
| Works in containers by default | ❌ | ✅ | ✅ |
| Does not consume file descriptors | ❌ | ✅ | ✅ |
| macOS support | ❌ | ✅ | ❌ |
When using `-e cpu` on Linux, async-profiler automatically checks for `perf_events` availability
by trying to create a dummy perf_event. If kernel-space profiling is not available,
async-profiler transparently falls back to `ctimer` mode. To force using `perf_events`
for user-space only profiling, specify `-e cpu-clock --all-user` instead of `-e cpu`.
The actual profiling engine (`perf_events`, `ctimer`, etc.) is now recorded in `jfr` output.

View File

@@ -0,0 +1,85 @@
# FlameGraph interpretation
To interpret a flame graph, the best way forward is to understand how it is created.
## Example application to profile
Let's take the below example:
```
main() {
// some business logic
func3() {
// some business logic
func7();
}
// some business logic
func4();
// some business logic
func1() {
// some business logic
func5();
}
// some business logic
func2() {
// some business logic
func6() {
// some business logic
func8(); // cpu intensive work here
}
}
```
## Profiler sampling
Profiling starts by taking samples `X` times per second. Whenever a sample is taken,
the current call stack for it is saved. The diagram below shows the unsorted sampling view
before the sorting and aggregation takes place.
![](/.assets/images/ProfilerSamplings.png)
Below are the sampling numbers:
- `func3()->func7()`: 3 samples
- `func4()`: 1 sample
- `func1()->func5()`: 2 samples
- `func2()->func8()`: 4 samples
- `func2()->func6()`: 1 sample
## Sorting samples
Samples are then alphabetically sorted at the base level just after root (or main method) of the application.
![](/.assets/images/SortedSamplings.png)
Note that X-axis is no longer a timeline. Flame graph does not preserve information
on _when_ a particular stack trace was taken, it only indicates _how often_
a stack trace was observed during profiling.
## Aggregated view
The blocks for the same functions at each level of stack depth are then stitched together
to get an aggregated view of the flame graph.
![](/.assets/images/AggregatedView.png)
In this example, except `func4()`, no other function actually consumes
any resource at the base level of stack depth. `func5()`, `func6()`,
`func7()` and `func8()` are the ones consuming resources, with `func8()`
being a likely candidate for performance optimization.
CPU utilization is the most common use case for flame graphs, however,
there are other modes of profiling like allocation profiling to view
heap utilization and wall-clock profiling to view latency.
[More on various modes of profiling](ProfilingModes.md)
## Understanding FlameGraph colors
Color is another flame graph dimension that may be used to encode additional information
about each frame. Colors may have different meaning in various flame graph implementations.
async-profiler uses the following palette to differentiate frame types:
![](/.assets/images/flamegraph_colors.png)

110
docs/GettingStarted.md Normal file
View File

@@ -0,0 +1,110 @@
# Getting started guide
## Before profiling
As of Linux 4.6, capturing kernel call stacks using `perf_events` from a non-root
process requires setting two kernel parameters. You can set them using sysctl as follows:
```
# sysctl kernel.perf_event_paranoid=1
# sysctl kernel.kptr_restrict=0
```
For better profiling accuracy, it is [recommended](Troubleshooting.md#known-limitations)
to start the JVM with `-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints` flags,
unless async-profiler is loaded at JVM startup.
## Find a process to profile
Common ways to find the target process include using
[`jps`](https://docs.oracle.com/en/java/javase/21/docs/specs/man/jps.html) and
[`pgrep`](https://man7.org/linux/man-pages/man1/pgrep.1.html).
For example, to list all Java process IDs with their full command lines, run
`pgrep -a java`. The next section includes an example using `jps`.
## Start profiling
async-profiler works in the context of the target Java application,
i.e. it runs as an agent in the process being profiled.
`asprof` is a tool to attach and control the agent.
A typical workflow would be to launch your Java application, attach
the agent and start profiling, exercise your performance scenario, and
then stop profiling. The agent's output, including the profiling results, will
be displayed on the console where you've started `asprof`.
Example:
```
$ jps
9234 Jps
8983 Computey
$ asprof start 8983
$ asprof stop 8983
```
The following may be used in lieu of the `pid` (8983):
- The keyword `jps`, which will find `pid` automatically, if there is a single Java process running in the system.
- The application name as it appears in the `jps` output: e.g. `Computey`
Alternatively, you may specify `-d` (duration) argument to profile
the application for a fixed period of time with a single command.
```
$ asprof -d 30 8983
```
By default, the profiling frequency is 100Hz (every 10ms of CPU time).
Here is a sample output of `asprof`:
```
--- Execution profile ---
Total samples: 687
Unknown (native): 1 (0.15%)
--- 6790000000 (98.84%) ns, 679 samples
[ 0] Primes.isPrime
[ 1] Primes.primesThread
[ 2] Primes.access$000
[ 3] Primes$1.run
[ 4] java.lang.Thread.run
... a lot of output omitted for brevity ...
ns percent samples top
---------- ------- ------- ---
6790000000 98.84% 679 Primes.isPrime
40000000 0.58% 4 __do_softirq
... more output omitted ...
```
This indicates that the hottest method was `Primes.isPrime`, and the hottest
call stack leading to it comes from `Primes.primesThread`.
## Other use cases
- [Launching as an agent](IntegratingAsyncProfiler.md#launching-as-an-agent)
- [Java API](IntegratingAsyncProfiler.md#using-java-api)
- [IntelliJ IDEA](IntegratingAsyncProfiler.md#intellij-idea)
## FlameGraph visualization
async-profiler provides out-of-the-box [Flame Graph](https://www.brendangregg.com/flamegraphs.html) support.
Specify `-o flamegraph` argument to dump profiling results as an interactive HTML Flame Graph.
Also, Flame Graph output format will be chosen automatically if the target filename ends with `.html`.
```
$ jps
9234 Jps
8983 Computey
$ asprof -d 30 -f /tmp/flamegraph.html 8983
```
[![Example](/.assets/images/flamegraph.png)](https://htmlpreview.github.io/?https://github.com/async-profiler/async-profiler/blob/master/.assets/html/flamegraph.html)
The flame graph html can be opened in any browser of your choice for further interpretation.
Please refer to [Interpreting a Flame Graph](FlamegraphInterpretation.md)
to understand more on how to interpret a Flame Graph.

94
docs/Heatmap.md Normal file
View File

@@ -0,0 +1,94 @@
# Heatmap
Problems to be solved with a profiler can be divided into two large categories:
1. Optimization of overall resource usage.
2. Troubleshooting of intermittent performance issues.
While flame graphs are handy for the first type of problems, they are not very helpful
for analyzing transient anomalies because they provide an aggregated view that lacks
any timeline information. To address the second type of problems, async-profiler offers
a converter from JFR format to an interactive heatmap in the form of a single-page HTML file.
Heatmap is an alternative representation of profile data that preserves timestamps
of particular samples. Essentially, it's a two-dimensional timeline composed of
colored blocks. Each block represents a short period of time (usually in the range of
milliseconds to seconds) with its color being the third dimension: the more intense
the color, the more events happened in a given period of time.
![](/.assets/images/heatmap.png)
The idea of heatmaps was borrowed from [FlameScope](https://github.com/Netflix/flamescope),
however, FlameScope targets short profiling intervals up to a few minutes, whereas
async-profiler implementation is capable of visualizing 24-hour recordings
with the granularity of 20 milliseconds. Moreover, heatmaps produced by async-profiler
are serverless: they are standalone self-contained HTML files that can be easily shared
and viewed without additional software besides a browser.
## Heatmap features
### Whole day profile
Heatmaps are optimized for information density. Full day of continuous profiling
can be presented on a single image, where an engineer can spot regular activity
patterns as well as anomalies at a glance.
Heatmaps are also optimized for footprint. Specialized compression algorithms
can pack 1 GB original JFR recording to an HTML page of 10-15 MB in size.
![](/.assets/images/heatmap1.png)
### Scale / zoom
Depending on the recording duration and level of detail you are interested in,
you can switch between 3 available scales. On the largest scale, each vertical line
represents 5 minutes of wall clock time, with each square corresponding to
5 second interval. On the finest scale, each square corresponds to 20 milliseconds,
allowing you to analyze profiling samples with a high resolution.
![](/.assets/images/heatmap2.png)
### Instant flame graphs
A click on any heatmap square displays a flame graph for this specific time interval.
![](/.assets/images/heatmap3.png)
Hold mouse button to select an arbitrary time range on a heatmap.
A flame graph for the given time range will be built automatically.
![](/.assets/images/heatmap4.png)
### Compare time ranges
Select target time range as described above. Holding `Ctrl` key,
move mouse pointer to choose another time range that will serve as a baseline.
You will then get a differential flame graph highlighting stacks
that were seen more often in the target time range comparing to the baseline.
![](/.assets/images/heatmap5.png)
### Search
Press `Ctrl+F` and enter a regex to search on the entire heatmap.
Time intervals containing matched stacks will be highlighted on a heatmap in blue.
Matching frames, if any, will be also highlighted on a flame graph.
`Ctrl+Shift+F` does the same, except that a flame graph will
retain stacks with matching frames only. All other stacks will be filtered out.
![](/.assets/images/heatmap6.png)
## Producing heatmaps
Heatmaps can only be generated from recordings in JFR format.
Run [`jfrconv`](ConverterUsage.md) tool with `-o heatmap` option.
Standard `jfrconv` options (`--cpu`, `--alloc`, `--from`/`--to`, `--simple`, etc.)
are also applicable to heatmaps.
Example:
```
jfrconv --cpu -o heatmap profiler.jfr heatmap-cpu.html
```

View File

@@ -0,0 +1,65 @@
# Integrating async-profiler
## Launching as an agent
If you need to profile some code as soon as the JVM starts up, instead of using `asprof`,
it is possible to attach async-profiler as an agent on the command line. For example:
```
$ java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,file=profile.html ...
```
On macOS, the library name is `libasyncProfiler.dylib` instead of `libasyncProfiler.so`.
Agent library is configured through the JVMTI argument interface.
The argument string is a comma-separated list of [profiler options](ProfilerOptions.md):
```
option[=value],option[=value]...
```
`asprof` internally converts command line arguments to the above format and attaches
`libasyncProfiler.so` agent to a running process.
Another important use of attaching async-profiler as an agent is for continuous profiling.
## Using Java API
async-profiler can be controlled programmatically using Java API. The corresponding Java library
is published to Maven Central. You can [include it](https://mvnrepository.com/artifact/tools.profiler/async-profiler/latest)
just like any other Maven dependency:
```
<dependency>
<groupId>tools.profiler</groupId>
<artifactId>async-profiler</artifactId>
<version>X.Y</version>
</dependency>
```
### Example usage with the API
```
AsyncProfiler profiler = AsyncProfiler.getInstance();
```
The above gives us an instance of `AsyncProfiler` object which can be further used to start
actual profiling.
```
profiler.execute("start,jfr,event=cpu,file=/path/to/%p.jfr");
// do some meaningful work
profiler.execute("stop");
```
`%p` equates to the PID of the process. Filename may include other placeholders which
can be found in [Profiler Options](ProfilerOptions.md).
`file` should be specified only once, either in
`start` command with `jfr` output or in `stop` command with any other format.
## Intellij IDEA
Intellij IDEA comes bundled with async-profiler, which can be further configured to our needs
by selecting the `Java Profiler` menu option at `Settings/Preferences > Build, Execution, Deployment`.
Agent options can be modified for the specific use cases and also `Collect native calls` can be checked
to monitor non-java threads and native frames in Java stack traces.

41
docs/JfrVisualization.md Normal file
View File

@@ -0,0 +1,41 @@
# JFR Visualization
JFR recordings produced by async-profiler can be viewed using multiple options explained below.
## Built-in converter
async-profiler provides a built-in converter `jfrconv` which can be used to convert `jfr` output
to a flame graph or one of the other supported formats. More details on the built-in converter usage
can be found [here](ConverterUsage.md).
## JMC
[JDK Mission Control](https://www.oracle.com/java/technologies/jdk-mission-control.html) (JMC)
is a popular GUI tool to analyze JFR recordings.
It has been originally developed to work in conjunction with the JDK Flight Recorder,
however, async-profiler recordings are also fully compatible with JMC.
When viewing async-profiler recordings in JMC, information on some tabs may be missing.
Developers are typically interested in the following sections:
- Java Application
- Method Profiling
- Memory
- Lock Instances
- JVM Internals
- TLAB Allocations
## IntelliJ IDEA
IntelliJ IDEA Ultimate has built-in JFR viewer that works perfectly with async-profiler recordings.
For the Community Edition, there is an open-source profiler [plugin](https://plugins.jetbrains.com/plugin/20937-java-jfr-profiler)
that allows you to profile Java applications with JFR and async-profiler as well as
open JFR files obtained outside IDE.
## JFR command line tool
JDK distributions include the `jfr` command line utility to filter, summarize and output
flight recording files into human-readable format. The
[official documentation](https://docs.oracle.com/en/java/javase/21/docs/specs/man/jfr.html)
provides complete information on how to manipulate the contents and translate it as per
developers' needs to debug performance issues with their Java applications.

63
docs/OutputFormats.md Normal file
View File

@@ -0,0 +1,63 @@
# Output Formats
async-profiler currently supports the following output formats:
- `collapsed` - This is a collection of call stacks, where each line is a semicolon separated list of frames followed
by a counter. This is used by the FlameGraph script to generate the FlameGraph visualization of the profile data.
```
FileConverter.main;FileConverter.convertFile;FileConverter.saveResult 21
FileConverter.main;FileConverter.convertFile;FileConverter.saveResult;java/io/DataOutputStream.writeInt 1
FileConverter.main;FileConverter.convertFile;FileConverter.saveResult;java/io/DataOutputStream.writeInt;java/io/ByteArrayOutputStream.write 5
FileConverter.main;FileConverter.convertFile;FileConverter.saveResult;java/io/DataOutputStream.writeUTF;java/io/DataOutputStream.writeUTF 12
FileConverter.main;FileConverter.convertFile;FileConverter.saveResult;java/io/DataOutputStream.writeUTF;java/io/DataOutputStream.writeUTF;java/lang/String.length 3
FileConverter.main;FileConverter.convertFile;FileConverter.saveResult;java/io/DataOutputStream.writeUTF;java/io/DataOutputStream.writeUTF;java/io/DataOutputStream.write 6
start_thread;thread_native_entry;Thread::call_run;VMThread::run;VMThread::inner_execute;VMThread::evaluate_operation;VM_Operation::evaluate;VM_GenCollectForAllocation::doit;GenCollectedHeap::satisfy_failed_allocation;GenCollectedHeap::do_collection;GenCollectedHeap::collect_generation;DefNewGeneration::collect;DefNewGeneration::FastEvacuateFollowersClosure::do_void 12
start_thread;thread_native_entry;Thread::call_run;VMThread::run;VMThread::inner_execute;VMThread::evaluate_operation;VM_Operation::evaluate;VM_GenCollectForAllocation::doit;GenCollectedHeap::satisfy_failed_allocation;GenCollectedHeap::do_collection;GenCollectedHeap::collect_generation;DefNewGeneration::collect;DefNewGeneration::FastEvacuateFollowersClosure::do_void;void ContiguousSpace::oop_since_save_marks_iterate<DefNewScanClosure> 1
```
- `flamegraph` - FlameGraph is a hierarchical representation of call traces of the profiled software in a color coded
format. Read more on the [interpretation](FlamegraphInterpretation.md) of FlameGraphs.
[![FlameGraph](/.assets/images/flamegraph.png)](https://htmlpreview.github.io/?https://github.com/async-profiler/async-profiler/blob/master/.assets/html/flamegraph.html)
- `tree` - Profile output generated in HTML format showing a tree view of resource usage beginning with the call stack
with the highest resource usage and then showing other call stacks in descending order of resource usage. Expanding a
parent frame follows the same hierarchical representation within that frame.
![Tree](/.assets/images/treeview_example.png)
- `text` - If no output format is specified with `-o` and filename has no extension provided, profiled output is
generated in text format.
```
--- Execution profile ---
Total samples : 733
--- 8208 bytes (19.58%), 1 sample
[ 0] byte[]
[ 1] java.util.jar.Manifest$FastInputStream.<init>
[ 2] java.util.jar.Manifest$FastInputStream.<init>
[ 3] java.util.jar.Manifest.read
[ 4] java.util.jar.Manifest.<init>
[ 5] java.util.jar.Manifest.<init>
[ 6] java.util.jar.JarFile.getManifestFromReference
[ 7] java.util.jar.JarFile.getManifest
[ 8] jdk.internal.loader.URLClassPath$JarLoader$2.getManifest
[ 9] jdk.internal.loader.BuiltinClassLoader.defineClass
[10] jdk.internal.loader.BuiltinClassLoader.findClassOnClassPathOrNull
[11] jdk.internal.loader.BuiltinClassLoader.loadClassOrNull
[12] jdk.internal.loader.BuiltinClassLoader.loadClass
[13] jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass
[14] java.lang.ClassLoader.loadClass
[15] java.lang.Class.forName0
[16] java.lang.Class.forName
[17] sun.launcher.LauncherHelper.loadMainClass
[18] sun.launcher.LauncherHelper.checkAndLoadMain
```
- `jfr` - profile format used by the JDK Flight Recorder. The `jfr` format collects data
about the JVM as well as the Java application running on it. async-profiler can generate output in `jfr` format
compatible with tools capable of viewing and analyzing `jfr` files. JDK Mission Control (JMC) and Intellij IDEA are
some of many options to visualize `jfr` files. More details [here](JfrVisualization.md).
- `otlp` - OpenTelemetry protocol format for [profiling data](https://opentelemetry.io/blog/2024/profiling).
Experimental feature: backward-incompatible changes may happen in future releases of async-profiler.

130
docs/ProfilerOptions.md Normal file
View File

@@ -0,0 +1,130 @@
# Profiler options
The below tables list the profiler options available with `asprof` and also when
[launching as an agent](IntegratingAsyncProfiler.md#launching-as-an-agent).
Some tables are output specific, which means some options are applicable to only one or more output formats but not all.
```
Usage: asprof [action] [options] [PID]
```
## Actions
The below options are `action`s for async-profiler and common for both `asprof` binary and when launching as an agent.
| Option | Description |
| --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `start` | Start profiling in semi-automatic mode, i.e. profiler will run until `stop` command is explicitly called. |
| `resume` | Start or resume earlier profiling session that has been stopped. All the collected data remains valid. The profiling options are not preserved between sessions, and should be specified again. |
| `stop` | Stop profiling and print the report. |
| `dump` | Dump collected data without stopping profiling session. |
| `status` | Print profiling status: whether profiler is active and for how long. |
| `metrics` | Print profiler metrics in Prometheus format. |
| `list` | Show the list of profiling events available for the target process specified with PID. |
## General options
| asprof | Launch as agent | Description |
| -------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `-o fmt` | `fmt` | Specifies what information to dump when profiling ends. For various dump option details, please refer to [Dump Option Appendix](#dump-option). |
| `-f FILENAME` | `file=FILENAME` | The file name to dump the profile information to.<br>`%p` in the file name is expanded to the PID of the target JVM;<br>`%t` - to the timestamp;<br>`%n{MAX}` - to the sequence number;<br>`%{ENV}` - to the value of the given environment variable.<br>Example: `asprof -o collapsed -f /tmp/traces-%t.txt 8983` |
| `-d N` | N/A | asprof-only option designed for interactive use. It is a shortcut for running 3 actions: start, sleep for N seconds, stop. If no `start`, `resume`, `stop` or `status` option is given, the profiler will run for the specified period of time and then automatically stop.<br>Example: `asprof -d 30 <pid>` |
| `--timeout N` | `timeout=N` | The profiling duration, in seconds. The profiler will run for the specified period of time and then automatically stop.<br>Example: `java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,timeout=30,file=profile.html <application>` |
| `--loop TIME` | `loop=TIME` | Run profiler in a loop (continuous profiling). The argument is either a clock time (`hh:mm:ss`) or a loop duration in `s`econds, `m`inutes, `h`ours, or `d`ays. Make sure the filename includes a timestamp pattern, or the output will be overwritten on each iteration.<br>Example: `asprof --loop 1h -f /var/log/profile-%t.jfr 8983` |
| `-e --event EVENT` | `event=EVENT` | The profiling event: `cpu`, `alloc`, `nativemem`, `lock`, `cache-misses` etc. Use `list` to see the complete list of available events.<br>Please refer to [Profiling Modes](ProfilingModes.md) for additional information. |
| `-i --interval N` | `interval=N` | Interval has different meaning depending on the event. For CPU profiling, it's CPU time in nanoseconds. In wall clock mode, it's wall clock time. For Java method profiling or native function profiling, it's number of calls. For PMU profiling, it's number of events. Time intervals may be followed by `s` for seconds, `ms` for milliseconds, `us` for microseconds or `ns` for nanoseconds.<br>Example: `asprof -e cpu -i 5ms 8983` |
| `--alloc N` | `alloc=N` | Allocation profiling interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). |
| `--tlab` | `tlab` | Use TLAB events for allocation profiling |
| `--live` | `live` | Retain allocation samples with live objects only (object that have not been collected by the end of profiling session). Useful for finding Java heap memory leaks. |
| `--nativemem N` | `nativemem=N` | Native memory allocation profiling. N, if specified is the interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). Default N is 0. |
| `--nofree` | `nofree` | Will not record free calls in native memory allocation profiling. This is relevant when tracking memory leaks is not important and there are lots of free calls. |
| `--trace METHOD[:T]` | `trace=METHOD[:T]` | Java method to be traced, optionally followed by a latency threshold.<br>Example: `--trace my.pkg.Class.Method:50ms`.<br>Latency threshold defaults to 0 (all calls are profiled). Can be used multiple times. |
| `--lock TIME` | `lock=TIME` | In lock profiling mode, sample contended locks whenever total lock wait time overflows the specified threshold. |
| `--nativelock TIME` | `nativelock=TIME ` | In native lock profiling mode, sample contended pthread locks (mutex/rwlock) whenever total lock wait time overflows the specified threshold. |
| `--wall INTERVAL` | `wall=INTERVAL` | Wall clock profiling interval. Use this option instead of `-e wall` to enable wall clock profiling with another event, typically `cpu`.<br>Example: `asprof -e cpu --wall 100ms -f combined.jfr 8983`. |
| `--nobatch` | `nobatch` | Disable wall clock profiling optimization. Async-profiler will emit one `jdk.ExecutionSample` event for each wall clock sample instead of batching them in a custom `profiler.WallClockSample` event. |
| `-j N` | `jstackdepth=N` | Sets the maximum stack depth. The default is 2048.<br>Example: `asprof -j 30 8983`<br>The argument may include two numbers separated by `/` (e.g. `200/40`). In this case, stack traces deeper than 200 frames will be truncated to the top 40 frames. This can be useful to prevent a deep recursion from bloating the profile. |
| `-F features` | `features=LIST` | Comma separated (or `+` separated when launching as an agent) list of stack walking features. Supported features are:<ul><li>`stats` - log stack walking performance stats.</li><li>`vtable` - display targets of megamorphic virtual calls as an extra frame on top of `vtable stub` or `itable stub`.</li><li>`comptask` - display current compilation task (a Java method being compiled) in a JIT compiler stack trace.</li><li>`pcaddr` - display instruction addresses .</li></ul>More details [here](AdvancedStacktraceFeatures.md). |
| `-L level` | `loglevel=level` | Log level: `debug`, `info`, `warn`, `error` or `none`. |
| N/A | `log=FILENAME` | Dedicated file for log messages. Used internally by asprof. |
| N/A | `quiet` | Do not log "Profiling started/stopped" message. Used internally by asprof. |
| N/A | `server=ADDRESS` | Start insecure HTTP server with the given IP address/port to control the profiler. This option can be specified as `-agentpath` argument only. Be careful not to expose async-profiler server in a public network. |
| `--all-user` | `alluser` | Include only user-mode events. This option is helpful when kernel profiling is restricted by `perf_event_paranoid` settings. |
| `--sched` | `sched` | Group threads by Linux-specific scheduling policy: BATCH/IDLE/OTHER. |
| `--cstack MODE` | `cstack=MODE` | How to walk native frames (C stack). Possible modes are `fp` (Frame Pointer), `dwarf` (DWARF unwind info), `vm`, `vmx` (HotSpot VM Structs) and `no` (do not collect C stack).<br><br>By default, C stack is shown in cpu, ctimer, wall-clock and perf-events profiles. Java-level events like `alloc` and `lock` collect only Java stack. |
| `--signal NUM` | `signal=NUM` | Use alternative signal for cpu or wall clock profiling. To change both signals, specify two numbers separated by a slash: `--signal SIGCPU/SIGWALL`. |
| `--clock SOURCE` | `clock=SOURCE` | Clock source for JFR timestamps: `tsc` (default) or `monotonic` (equivalent for `CLOCK_MONOTONIC`). |
| `--begin function` | `begin=FUNCTION` | Automatically start profiling when the specified native function is executed. |
| `--end function` | `end=FUNCTION` | Automatically stop profiling when the specified native function is executed. |
| `--ttsp` | `ttsp` | Time-to-safepoint profiling. An alias for `--begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized`.<br>It is not a separate event type, but rather a constraint. Whatever event type you choose (e.g. `cpu` or `wall`), the profiler will work as usual, except that only events between the safepoint request and the start of the VM operation will be recorded. |
| `--nostop` | `nostop` | Record profiling window between `--begin` and `--end`, but do not stop profiling outside window. |
| `--memlimit SIZE` | `memlimit=SIZE` | Limit memory used by the call trace storage. Once the limit is exceeded, no new stack traces will be recorded. The lowest possible limit is 10 MB; the default is unlimited.<br>Example: `asprof -e cpu --memlimit 128m` |
| `--libpath PATH` | N/A | Full path to `libasyncProfiler.so` (useful when profiling a container from the host). |
| `--filter FILTER` | `filter=FILTER` | In the wall-clock profiling mode, profile only threads with the specified ids.<br>Example: `asprof -e wall -d 30 --filter 120-127,132,134 Computey` |
| `--fdtransfer` | `fdtransfer` | Run a background process that provides access to perf_events to an unprivileged process. `--fdtransfer` is useful for profiling a process in a container (which lacks access to perf_events) from the host.<br>See [Profiling Java in a container](ProfilingInContainer.md). |
| `--target-cpu` | `target-cpu` | In perf_events profiling mode, instruct the profiler to only sample threads running on the specified CPU, defaults to -1.<br>Example: `asprof --target-cpu 3`. |
| `--record-cpu` | `record-cpu` | In perf_events profiling mode, instruct the profiler to capture which CPU a sample was taken on. |
| `-v --version` | `version` | Prints the version of profiler library. If PID is specified, gets the version of the library loaded into the given process. |
## Options applicable to JFR output only
| asprof | Launch as agent | Description |
| ------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `--chunksize N` | `chunksize=N` | Approximate size for a single JFR chunk. A new chunk will be started whenever specified size is reached. The default `chunksize` is 100MB.<br>Example: `asprof -f profile.jfr --chunksize 100m 8983` |
| `--chunktime N` | `chunktime=N` | Approximate time limit for a single JFR chunk. A new chunk will be started whenever specified time limit is reached. The default `chunktime` is 1 hour.<br>Example: `asprof -f profile.jfr --chunktime 1h 8983` |
| `--jfropts OPTIONS` | `jfropts=OPTIONS` | Comma separated list of JFR recording options. Currently, the only available option is `mem` supported on Linux 3.17+. `mem` enables accumulating events in memory instead of flushing synchronously to a file. |
| `--jfrsync CONFIG` | `jfrsync[=CONFIG]` | Start Java Flight Recording with the given configuration synchronously with the profiler. The output .jfr file will include all regular JFR events, except that execution samples will be obtained from async-profiler. This option implies `-o jfr`.<br>`CONFIG` is a predefined JFR profile or a JFR configuration file (.jfc) or a list of JFR events started with `+`.<br>Example: `asprof -e cpu --jfrsync profile -f combined.jfr 8983` |
| `--proc INTERVAL` | `proc=INTERVAL` | Collect statistics about other processes in the system. Default sampling interval is 30s. |
| `--all` | `all` | Shorthand for enabling `cpu`, `wall`, `alloc`, `live`, `lock`, `nativelock`, `nativemem`, and `proc` profiling simultaneously. This can be combined with `--alloc 2m --lock 10ms` etc. to pass custom interval/threshold. It is also possible to combine it with `-e` argument to change the type of event being collected (default is `cpu`). This is not recommended for production, especially for continuous profiling. |
## Options applicable to FlameGraph and Tree view outputs only
| asprof | Launch as agent | Description |
| -------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `--title TITLE` | `title=TITLE` | Custom title of a FlameGraph.<br>Example: `asprof -f profile.html --title "Sample CPU profile" 8983` |
| `--minwidth PERCENT` | `minwidth=PERCENT` | Minimum frame width as a percentage. Smaller frames will not be visible.<br>Example: `asprof -f profile.html --minwidth 0.5 8983` |
| `--reverse` | `reverse` | Reverse stack traces (defaults to icicle graph).<br>Example: `asprof -f profile.html --reverse 8983` |
| `--inverted` | `inverted` | Toggles the layout for reversed stacktraces from icicle to flamegraph and for default stacktraces from flamegraph to icicle.<br>Example: `asprof -f profile.html --inverted 8983` |
Notice that `--reverse` and `--inverted` are orthogonal settings. By default, flamegraphs grow from bottom to top (because flames grow from bottom to top). The outermost frames (e.g. the `main()` function) are shown at the bottom while the innermost, leaf frames are shown at the top. If such a flame graph is mirrored on the y-axis, it becomes an icicle graph (icicles grow top-down). The default setting for this layout can be toggled with the `--inverted` option when the graph is created or changed later with the `Invert` button which is located in the upper-left corner of the generated HTML page, when the graph is displayed.
By default, async-profiler merges stack traces starting from the outermost (e.g. `main()`) frames and displays them from bottom to top in a flamegraph. The `--reverse` option can be used to create reverse stack traces, i.e. merge them starting with the innermost, leaf frames. By default, reversed stack traces are displayed from top to bottom as icicle graphs. The default layout setting for both, normal and reversed stack traces can be changed with the `--inverted` option.
## Options applicable to any output format except JFR
| asprof | Launch as agent | Description |
| -------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `-t --threads` | `threads` | Profile threads separately. Each stack trace will end with a frame that denotes a single thread.<br>Example: `asprof -t 8983` |
| `-s --simple` | `simple` | Print simple class names instead of fully qualified names. |
| `-n --norm` | `norm` | Normalize names of hidden classes / lambdas. |
| `-g --sig` | `sig` | Print method signatures. |
| `-a --ann` | `ann` | Annotate JIT compiled methods with `_[j]`, inlined methods with `_[i]`, interpreted methods with `_[0]` and C1 compiled methods with `_[1]`. FlameGraph and Tree view will color frames depending on their type regardless of this option. |
| `-l --lib` | `lib` | Prepend library names to symbols, e.g. ``libjvm.so`JVM_DefineClassWithSource``. |
| `--dot` | `dot` | Dotted class names, e.g. `java.lang.String` instead of `java/lang/String`. |
| `--samples` | `samples` | Count the number of samples. This is the default aggregation option. |
| `--total` | `total` | Count the total value of the collected metric instead of the number of samples, e.g. total allocation size. |
| `-I PATTERN` | `include=PATTERN` | Filter stack traces by the given pattern(s). `-I` defines the name pattern that _must_ be present in the stack traces. `-I` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -I 'Primes.*' -I 'java/*' 8983` |
| `-X PATTERN` | `exclude=PATTERN` | Filter stack traces by the given pattern(s). `-X` defines the name pattern that _must not_ occur in any of stack traces in the output. `-X` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -X '*Unsafe.park*' 8983` |
| N/A | `mcache[=AGE]` | Maximum age of the method name cache. Default is `0` (do not cache method names between profiling sessions). |
## Appendix
### Dump Option
`-o fmt` - specifies what information to dump when profiling ends.
`fmt` can be one of the following options:
- `traces[=N]` - dump call traces (at most N samples);
- `flat[=N]` - dump flat profile (top N hot methods);
- can be combined with `traces`, e.g. `traces=200,flat=200`
- `jfr` - dump events in JDK Flight Recorder format readable by JDK Mission Control.
- `collapsed` - dump collapsed call traces in the format used by
[FlameGraph](https://github.com/brendangregg/FlameGraph) script. This is
a collection of call stacks, where each line is a semicolon separated list
of frames followed by a counter.
- `flamegraph` - produce Flame Graph in HTML format.
- `tree` - produce Call Tree in HTML format.
- `--reverse` option will generate backtrace view.
- `otlp` - dump events in OpenTelemetry format.
It is possible to specify multiple dump options at the same time.

View File

@@ -0,0 +1,24 @@
# Profiling Java in a container
async-profiler provides the ability to profile Java processes running in a Docker or LXC
container both from within a container and from the host system.
When profiling from the host, `pid` should be the Java process ID in the host
namespace. Use `ps aux | grep java` or `docker top <container>` to find
the process ID.
async-profiler should be run from the host by a privileged user - it will
automatically switch to the proper pid/mount namespace and change
user credentials to match the target process. Also make sure that
the target container can access `libasyncProfiler.so` by the same
absolute path as on the host. Alternatively, specify `--libpath` option
to override path to `libasyncProfiler.so` in a container.
By default, Docker container restricts the access to `perf_event_open`
syscall. There are 3 alternatives to allow profiling in a container:
1. You can modify the [seccomp profile](https://docs.docker.com/engine/security/seccomp/)
or disable it altogether with `--security-opt seccomp=unconfined` option. In
addition, `--cap-add SYS_ADMIN` may be required.
2. You can use "fdtransfer": see the help for `--fdtransfer`.
3. Last, you may fall back to `-e ctimer` profiling mode, see [Troubleshooting](Troubleshooting.md).

347
docs/ProfilingModes.md Normal file
View File

@@ -0,0 +1,347 @@
# Profiling modes
Besides CPU time, async-profiler provides various other profiling modes such as `Allocation`, `Wall Clock`, `Java Method`
and even a `Multiple Events` profiling mode.
## CPU profiling
In this mode, profiler collects stack trace samples that include **Java** methods,
**native** calls, **JVM** code and **kernel** functions.
The general approach is receiving call stacks generated by `perf_events`
and matching them up with call stacks generated by `AsyncGetCallTrace`,
in order to produce an accurate profile of both Java and native code.
Additionally, async-profiler provides a workaround to recover stack traces
in some [corner cases](https://bugs.openjdk.java.net/browse/JDK-8178287)
where `AsyncGetCallTrace` fails.
This approach has the following advantages compared to using `perf_events`
directly with a Java agent that translates addresses to Java method names:
- Does not require `-XX:+PreserveFramePointer`, which introduces
performance overhead that can be sometimes as high as 10%.
- Does not require starting JVM with an agent for translating Java code addresses
to method names.
- Displays interpreter frames.
- Does not produce large intermediate files (perf.data) for further processing in
user space scripts.
If you wish to resolve frames within `libjvm`, the [debug symbols](#installing-debug-symbols) are required.
## ALLOCATION profiling
The profiler can be configured to collect call sites where the largest amount
of heap memory is allocated.
async-profiler does not use intrusive techniques like bytecode instrumentation
or expensive DTrace probes which have significant performance impact.
It also does not affect Escape Analysis or prevent from JIT optimizations
like allocation elimination. Only actual heap allocations are measured.
The profiler features TLAB-driven sampling. It relies on HotSpot-specific
callbacks to receive two kinds of notifications:
- when an object is allocated in a newly created TLAB;
- when an object is allocated on a slow path outside TLAB.
Sampling interval can be adjusted with `--alloc` option.
For example, `--alloc 500k` will take one sample after 500 KB of allocated
space on average. Prior to JDK 11, intervals less than TLAB size will not take effect.
In allocation profiling mode, the top frame of every call trace is the class
of the allocated object, and the counter is the heap pressure (the total size
of allocated TLABs or objects outside TLAB).
### Installing Debug Symbols
Prior to JDK 11, the allocation profiler required HotSpot debug symbols.
Some OpenJDK distributions (Amazon Corretto, Liberica JDK, Azul Zulu)
already have them embedded in `libjvm.so`, other OpenJDK builds typically
provide debug symbols in a separate package. For example, to install
OpenJDK debug symbols on Debian / Ubuntu, run:
```
# apt install openjdk-17-dbg
```
(replace `17` with the desired version of JDK).
On CentOS, RHEL and some other RPM-based distributions, this could be done with
[debuginfo-install](http://man7.org/linux/man-pages/man1/debuginfo-install.1.html) utility:
```
# debuginfo-install java-1.8.0-openjdk
```
On Gentoo, the `icedtea` OpenJDK package can be built with the per-package setting
`FEATURES="nostrip"` to retain symbols.
The `gdb` tool can be used to verify if debug symbols are properly installed for the `libjvm` library.
For example, on Linux:
```
$ gdb $JAVA_HOME/lib/server/libjvm.so -ex 'info address UseG1GC'
```
This command's output will either contain `Symbol "UseG1GC" is at 0xxxxx`
or `No symbol "UseG1GC" in current context`.
## Native memory leaks
The profiling mode `nativemem` records `malloc`, `realloc`, `calloc` and `free` calls
with the addresses, so that allocations can be matched with frees. This helps to focus
the profile report only on unfreed allocations, which are the likely to be a source of a memory leak.
Example:
```
asprof start -e nativemem -f app.jfr <YourApp>
# or
asprof start --nativemem N -f app.jfr <YourApp>
# or if only allocation calls are interesting, do not collect free calls:
asprof start --nativemem N --nofree -f app.jfr <YourApp>
asprof stop <YourApp>
```
Now we need to process the jfr file, to find native memory leaks:
```
# --total for bytes, default counts invocations.
jfrconv --total --nativemem --leak app.jfr app-leak.html
# No leak analysis, include all native allocations:
jfrconv --total --nativemem app.jfr app-malloc.html
```
When `--leak` option is used, the generated flame graph will show allocations without matching `free` calls.
![nativemem flamegraph](../.assets/images/nativemem_flamegraph.png)
To avoid bias towards youngest allocations not freed by the end of the profiling session,
leak profiler ignores tail allocations made in the last 10% of the profiling period.
Tail length can be altered with `--tail` option that accepts `ratio` or `percent%` as an argument.
For example, to ignore allocations in the last 2 minutes of a 10 minutes profile, use
```
jfrconv --nativemem --leak --tail 20% app.jfr app-leak.html
```
The overhead of `nativemem` profiling depends on the number of native allocations,
but is usually small enough even for production use. If required, the overhead can be reduced
by configuring the profiling interval. E.g. if you add `nativemem=1m` profiler option,
allocation samples will be limited to at most one sample per allocated megabyte.
### Using LD_PRELOAD for finding native memory leaks
Similar to Java applications, `nativemem` mode can be also used with [non-Java processes](ProfilingNonJavaApplications.md).
Run an application with `nativemem` profiler that dumps recordings in JFR format every 10 minutes:
```
LD_PRELOAD=/path/to/libasyncProfiler.so ASPROF_COMMAND=start,nativemem,total,loop=10m,cstack=dwarf,file=profile-%t.jfr NativeApp [args]
```
Then run `jfrconv` to generate memory leak report as a flame graph:
```
jfrconv --total --nativemem --leak <profile>.jfr <profile>-leak.html
```
## Wall-clock profiling
`-e wall` option tells async-profiler to sample all threads equally every given
period of time regardless of thread status: Running, Sleeping or Blocked.
For instance, this can be helpful when profiling application start-up time.
Wall-clock profiler is most useful in per-thread mode: `-t`.
Example: `asprof -e wall -t -i 50ms -f result.html 8983`
## Lock profiling
`-e lock` option tells async-profiler to measure lock contention in the profiled application. Lock profiling can help
developers understand lock acquisition patterns, lock contention (when threads have to wait to acquire locks), time
spent waiting for locks and which code paths are blocked due to locks.
In lock profiling mode, the top frame is the class of lock/monitor, and the counter is number of nanoseconds it took to
enter this lock/monitor.
Example: `asprof -e lock -t -i 5ms -f result.html 8983`
## Native lock profiling
`--nativelock` option tells async-profiler to measure pthread lock contention in the profiled application.
Native lock profiling can help developers understand pthread lock acquisition patterns, lock contention (when threads
have to wait to acquire native locks), time spent waiting for pthread mutexes and read-write locks, and which code paths
are blocked due to native synchronization primitives.
Native lock profiling works by intercepting calls to:
- [`pthread_mutex_lock`](https://man7.org/linux/man-pages/man3/pthread_mutex_lock.3p.html)
- [`pthread_rwlock_rdlock`](https://man7.org/linux/man-pages/man3/pthread_rwlock_rdlock.3p.html)
- [`pthread_rwlock_wrlock`](https://man7.org/linux/man-pages/man3/pthread_rwlock_wrlock.3p.html)
In this mode, the top frame shows the native function that experienced contention (e.g., pthread_mutex_lock_hook),
and the counter represents the number of nanoseconds threads spent waiting to acquire the lock.
Key differences from Java lock profiling:
- Profiles native pthread locks instead of Java monitors.
- Works with C/C++ applications and native libraries used by Java applications.
- Captures contention in native code paths that Java lock profiling cannot see.
Example: `asprof --nativelock 5ms -t -f result.html 8983`
## Java method profiling
`-e ClassName.methodName` option instruments the given Java method
in order to record all invocations of this method with the stack traces.
Example: `-e java.util.Properties.getProperty` will profile all places
where `getProperty` method is called from.
Only non-native Java methods are supported. To profile a native method,
use hardware breakpoint event instead, e.g. `-e Java_java_lang_Throwable_fillInStackTrace`
**Be aware** that if you attach async-profiler at runtime, the first instrumentation
of a non-native Java method may cause the [deoptimization](https://github.com/openjdk/jdk/blob/bf2e9ee9d321ed289466b2410f12ad10504d01a2/src/hotspot/share/prims/jvmtiRedefineClasses.cpp#L4092-L4096)
of all compiled methods. The subsequent instrumentation flushes only the _dependent code_.
The massive CodeCache flush doesn't occur if attaching async-profiler as an agent.
### Latency profiling
Please refer to our blog post on [latency profiling](https://github.com/async-profiler/async-profiler/discussions/1497)
to know more about this profiling mode.
## Native function profiling
Here are some useful native functions to profile:
- `G1CollectedHeap::humongous_obj_allocate` - trace _humongous allocations_ of the G1 GC,
- `JVM_StartThread` - trace creation of new Java threads,
- `Java_java_lang_ClassLoader_defineClass1` - trace class loading.
## Multiple events
It is possible to profile CPU, allocations, and locks at the same time.
Instead of CPU, you may choose any other execution event: wall-clock,
perf event, tracepoint, Java method, etc.
The only output format that supports multiple events together is JFR.
The recording will contain the following event types:
- `jdk.ExecutionSample`
- `jdk.ObjectAllocationInNewTLAB` (alloc)
- `jdk.ObjectAllocationOutsideTLAB` (alloc)
- `jdk.JavaMonitorEnter` (lock)
- `jdk.ThreadPark` (lock)
To start profiling cpu + allocations + locks together, specify
```
asprof -e cpu,alloc,lock -f profile.jfr ...
```
or use `--alloc` and `--lock` parameters with the desired threshold:
```
asprof -e cpu --alloc 2m --lock 10ms -f profile.jfr ...
```
The same, when starting profiler as an agent:
```
-agentpath:/path/to/libasyncProfiler.so=start,event=cpu,alloc=2m,lock=10ms,file=profile.jfr
```
### Multi-event profiling using `--all`
The `--all` flag offers a way to simultaneously enable predefined collection of common profiling events. By default, `--all` activates profiling for `cpu`, `wall`, `alloc`, `live`, `lock` and `nativemem`.
**Important consideration**
While the `--all` flag can be useful for development environments to get a wide overview, it is not recommended to enable this in production, especially for continuous profiling. Users are invited to select carefully what to profile and with what settings.
**Sample command:**
This command enables the default set of events included in `--all`:
```
asprof --all -f profile.jfr
```
or combine it with `--alloc`/`--wall`/`--lock`/`--nativemem` options to override individual settings. For example:
```
asprof --all --alloc 2m --lock 10ms -f profile.jfr
```
The same, when starting profiler as an agent:
```
-agentpath:/path/to/libasyncProfiler.so=start,all,alloc=2m,lock=10ms,file=profile.jfr
```
Instead of `cpu`, it is possible to override the `--all` parameter with any other event type of your choice. For instance, the following command will profile `cycles` along with ` wall`, `alloc`, `live`, `lock` and `nativemem`:
```
asprof --all -e cycles -f profile.jfr
```
## Continuous profiling
Continuous profiling is a means by which an application can be profiled
continuously and dump profile results every specified time period.
It is a very effective technique in finding performance degradations proactively
and efficiently. Continuous profiling helps users to understand performance
differences between versions of the same application. Recent outputs can
be compared with continuous profiling output history to find differences
and optimize the changes introduced in case of performance degradations.
aysnc-profiler provides the ability to continously profile an application with
the `loop` option. Make sure the filename includes a timestamp pattern, or the
output will be overwritten on each iteration.
```
asprof --loop 1h -f /var/log/profile-%t.jfr 8983
```
## perf event types supported on Linux
| Usage | Description |
| ----------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Predefined: | |
| `-e cpu-clock` | High-resolution per-CPU timer. Similar to `-e cpu` but forces using perf_events. |
| `-e page-faults` | Software page faults |
| `-e context-switches` | Context switches |
| `-e cycles` | Total CPU cycles |
| `-e ref-cycles` | CPU reference cycles, not affected by CPU frequency scaling |
| `-e instructions` | Retired CPU instructions |
| `-e cache-references` | Cache accesses (usually Last Level Cache, but may depend on the architecture) |
| `-e cache-misses` | Cache accesses requiring fetching data from a higher-level cache or main memory |
| `-e branch-instructions` | Retired branch instructions |
| `-e branch-misses` | Mispredicted branch instructions |
| `-e bus-cycles` | Bus cycles |
| `-e L1-dcache-load-misses` | Cache misses on Level 1 Data Cache |
| `-e LLC-load-misses` | Cache misses on the Last Level Cache |
| `-e dTLB-load-misses` | Data load misses on the Translation Lookaside Buffer |
| Breakpoint: | |
| `-e mem:<addr>` | Breakpoint on a decimal or hex (0x) address |
| `-e mem:<func>` | Breakpoint on a public or a private symbol |
| `-e mem:<func>[+<offset>][/<len>][:rwx>]` | Breakpoint on a symbol or an address with offset, length and read/write/exec. Address, offset and length can be hex or dec. The format of `mem` event is the same as in [`perf-record`](https://man7.org/linux/man-pages/man1/perf-record.1.html). |
| `-e <symbol>` | Equivalent to an execution breakpoint on a symbol: `mem:<symbol>:x`. Example: `-e strcmp` will trace all calls of native `strcmp` function. |
| Tracepoint: | |
| `-e trace:<id>` | Kernel tracepoint with the given numeric id |
| `-e <tracepoint>` | Kernel tracepoint with the specified name. Example: `-e syscalls:sys_enter_open` will trace all `open` syscalls. |
| Probes: | |
| `-e kprobe:<func>[+<offset>]` | Kernel probe. Example: `-e kprobe:do_sys_open`. |
| `-e kretprobe:<func>[+<offset>]` | Kernel return probe. Example: `-e kretprobe:do_sys_open`. |
| `-e uprobe:<func>[+<offset>]` | Userspace probe. Example: `-e uprobe:/usr/lib64/libc-2.17.so+0x114790`. |
| `-e uretprobe:<func>[+<offset>]` | Userspace return probe |
| PMU: | |
| `-e r<NNN>` | Architecture-specific PMU event with the given number. Example: `-e r4d2` selects `MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM` event, which corresponds to event 0xd2, umask 0x4. |
| `-e <pmu descriptor>` | PMU event descriptor. Example: `-e cpu/cache-misses/`, `-e cpu/event=0xd2,umask=4/`. The same syntax can be used for uncore and vendor-specific events, e.g. `amd_l3/event=0x01,umask=0x80/` |

View File

@@ -0,0 +1,95 @@
# Profiling Non-Java applications
The scope of profiling non-Java applications is limited to the case when profiler is controlled
programmatically from the process being profiled or with `LD_PRELOAD`. It is worth noting that
[dynamic attach](IntegratingAsyncProfiler.md#launching-as-an-agent)
which is available for Java is not supported for non-Java profiling.
## LD_PRELOAD
async-profiler can be injected into a native application through the `LD_PRELOAD` mechanism:
```
LD_PRELOAD=/path/to/libasyncProfiler.so ASPROF_COMMAND=start,event=cpu,file=profile.jfr NativeApp [args]
```
All basic functionality remains the same. Profiler can run in `cpu`, `wall`, `nativemem` and other perf_events
modes. Flame Graph and JFR output formats are supported, although JFR files will obviously lack
Java-specific events.
See [Profiling Modes](ProfilingModes.md) for more examples.
## Controlling async-profiler via the C API
Similar to the
[Java API](IntegratingAsyncProfiler.md#using-java-api),
there is a C API for using profiler inside a native application.
Header file for the API is bundled in the async-profiler release package under [`include/asprof.h`](../src/asprof.h).
To use it in a C/C++ application, include the mentioned `asprof.h`. Below is an example showing how to invoke async-profiler with the API:
```
#include "asprof.h"
#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
void test_output_callback(const char* buffer, size_t size) {
fwrite(buffer, sizeof(char), size, stderr);
}
int main() {
void* lib = dlopen("/path/to/libasyncProfiler.so", RTLD_NOW);
if (lib == NULL) {
printf("%s\n", dlerror());
exit(1);
}
asprof_init_t asprof_init = (asprof_init_t)dlsym(lib, "asprof_init");
asprof_execute_t asprof_execute = (asprof_execute_t)dlsym(lib, "asprof_execute");
asprof_error_str_t asprof_error_str = (asprof_error_str_t)dlsym(lib, "asprof_error_str");
if (asprof_init == NULL || asprof_execute == NULL || asprof_error_str == NULL) {
printf("%s\n", dlerror());
dlclose(lib);
exit(1);
}
asprof_init();
printf("Starting profiler\n");
char cmd[] = "start,event=cpu,loglevel=debug,file=profile.jfr";
asprof_error_t err = asprof_execute(cmd, test_output_callback);
if (err != NULL) {
fprintf(stderr, "%s\n", asprof_error_str(err));
exit(1);
}
// ... some meaningful work ...
printf("Stopping profiler\n");
err = asprof_execute("stop", test_output_callback);
if (err != NULL) {
fprintf(stderr, "%s\n", asprof_error_str(err));
exit(1);
}
return 0;
}
```
## Unstable APIs
These APIs are unstable and might change or be removed in the next version of async-profiler.
### Advanced Sampling
The `asprof_get_thread_local_data` function returns a pointer to async-profiler's
thread-local data structure. The structure is guaranteed to live as long as the thread.
The returned structure contains a pointer that increments every time there is a sample. This gives
native code an easy way to detect when a sample event had occurred, and to log metadata about what the
program was doing when the event happened.

56
docs/StackWalkingModes.md Normal file
View File

@@ -0,0 +1,56 @@
# Stack Walking Modes
## Frame Pointer
`Frame Pointer (FP)` stack walking is a technique for collecting call stacks by tracking frame pointers in memory.
Each function call maintains a pointer to its caller's stack frame, creating a linked chain that can be traversed
to reconstruct the program's execution path. It's particularly efficient as it is very fast compared to other
stack walking methods introducing less overhead but requires code to be compiled with frame
pointers enabled (`-fno-omit-frame-pointer`).
Before async-profiler 4.2, Frame Pointer was the default stack walking mode.
Since version 4.2, the default was changed to [VM Structs](#vm-structs).
## DWARF
DWARF stack walking is a method to reconstruct call stacks using unwinding information embedded in executables
(typically in `.eh_frame` section). Unlike frame-pointer-based unwinding, it works reliably even with optimized code
where frame pointers are omitted.
DWARF unwinding requires extra memory (e.g. the lookup table for `libjvm.so` is about 2MB).
It is also slower than the traditional FP-based stack walker, but it's still fast enough for on-the-fly unwinding
due to being signal safe in async-profiler.
The feature can be enabled with the option `--cstack dwarf` (or its agent equivalent `cstack=dwarf`).
## VM Structs
async-profiler can leverage JVM internal structures to replicate the logic of Java stack walking
in the profiler itself without depending on the unstable JVM API.
This mode of stack walking has been introduced in async-profiler due to issues with `AsyncCallGetTrace`.
AsyncGetCallTrace (AGCT) is a non-standard extension of HotSpot JVM to obtain Java stack traces outside safepoints.
async-profiler had been relying on AGCT heavily, and it even got its name after this function.
`AsyncGetCallTrace` being non-API, was never supported in OpenJDK well enough, it did not receive enough testing, it was
broken several times even in minor JDK updates, e.g. [JDK-8307549](https://bugs.openjdk.org/browse/JDK-8307549).
AsyncGetCallTrace is notorious for its inability to walk Java stack in different corner cases. There is a long-standing
bug [JDK-8178287](https://bugs.openjdk.org/browse/JDK-8178287) with several examples. But the worst aspect is that
AsyncGetCallTrace can crash JVM, and there is no reliable way to get around this outside the JVM.
Due to issues with AGCT from time to time, including random crashes and missing stack traces,
`vm` stack walking mode based on HotSpot VM Structs was introduced in async-profiler.
`vm` stack walker has the following advantages:
- Fully enclosed by the crash protection based on `setjmp`/`longjmp`.
- Can show all frames: Java, native and JVM stubs throughout the whole stack.
- Provides additional information on each frame, like JIT compilation type.
The feature can be enabled with the option `--cstack vm` (or its agent equivalent `cstack=vm`).
Since async-profiler 4.2, this is the default mode when running on the HotSpot JVM.
Another variant of this option: `--cstack vmx` activates an "expert" unwinding based on VM Structs.
With this option, async-profiler collects mixed stack traces that have Java and native frames interleaved.
The maximum stack depth for `vm` or `vmx` stack walking is controlled with `-j depth` option.

133
docs/Troubleshooting.md Normal file
View File

@@ -0,0 +1,133 @@
# Troubleshooting
## Error Messages
### perf_event mmap failed: Operation not permitted
Profiler allocates 8 kB perf_event buffer for each thread of the target process.
The above error may appear if the total size of perf_event buffers (`8 * threads` kB)
exceeds locked memory limit. This limit is comprised of `ulimit -l` plus
the value of `kernel.perf_event_mlock_kb` sysctl multiplied by the number of CPU cores.
For example, on a 16-core machine, `ulimit -l 65536` and `kernel.perf_event_mlock_kb=516`
is enough for profiling `(65536 + 516*16) / 8 = 9224` threads.
If an application has more threads, increase one of the above limits, or native stacks
will not be collected for some threads.
A privileged process is not subject to the locked memory limit.
### Failed to change credentials to match the target process: Operation not permitted
Due to limitation of HotSpot Dynamic Attach mechanism, the profiler must be run
by exactly the same user (and group) as the owner of target JVM process.
If profiler is run by a different user, it will try to automatically change
current user and group. This will likely succeed for `root`, but not for
other users, resulting in the above error.
### Could not start attach mechanism: No such file or directory
The profiler cannot establish communication with the target JVM through UNIX domain socket.
Usually this happens in one of the following cases:
1. Attach socket `/tmp/.java_pidNNN` has been deleted. It is a common
practice to clean `/tmp` automatically with some scheduled script.
Configure the cleanup software to exclude `.java_pid*` files from deletion.
- How to check: run `lsof -p PID | grep java_pid`. If it lists a socket file, but the file does not exist, then this is exactly
the described problem.
2. JVM is started with `-XX:+DisableAttachMechanism` option.
3. `/tmp` directory of Java process is not physically the same directory
as `/tmp` of your shell, because Java is running in a container or in
`chroot` environment. `asprof` attempts to solve this automatically,
but it might lack the required permissions to do so.
- Check `strace asprof PID jcmd`
4. JVM is busy and cannot reach a safepoint. For instance,
JVM is in the middle of long-running garbage collection.
- How to check: run `kill -3 PID`. Healthy JVM process should print
a thread dump and heap info in its console.
### Target JVM failed to load libasyncProfiler.so
The connection with the target JVM has been established, but JVM is unable to load profiler shared library.
Make sure the user of JVM process has permissions to access `libasyncProfiler.so` by exactly the same absolute path.
For more information see [#78](https://github.com/async-profiler/async-profiler/issues/78).
### Perf events unavailable
`perf_event_open()` syscall has failed. Typical reasons include:
1. `/proc/sys/kernel/perf_event_paranoid` is set to restricted mode (>=2).
2. seccomp disables `perf_event_open` API in a container.
3. OS runs under a hypervisor that does not virtualize performance counters.
4. perf_event_open API is not supported on this system, e.g. WSL.
<br>For permissions-related reasons (such as 1 and 2), using `--fdtransfer` while running the profiler
as a privileged user may solve the issue.
If changing the configuration is not possible, you may fall back to
`-e ctimer` profiling mode. It is similar to `cpu` mode, but does not
require perf_events support. As a drawback, there will be no kernel
stack traces.
### No AllocTracer symbols found. Are JDK debug symbols installed?
The OpenJDK debug symbols are required for allocation profiling for applications developed
with JDK prior to 11. See [Installing Debug Symbols](ProfilingModes.md#installing-debug-symbols) for more
details. If the error message persists after a successful installation of the debug symbols,
it is possible that the JDK was upgraded when installing the debug symbols.
In this case, profiling any Java process which had started prior to the installation
will continue to display this message, since the process had loaded
the older version of the JDK which lacked debug symbols.
Restarting the affected Java processes should resolve the issue.
### VMStructs unavailable. Unsupported JVM?
JVM shared library does not export `gHotSpotVMStructs*` symbols -
apparently this is not a HotSpot JVM. Sometimes the same message
can be also caused by an incorrectly built JDK
(see [#218](https://github.com/async-profiler/async-profiler/issues/218)).
In these cases installing JDK debug symbols may solve the problem.
### Could not parse symbols from <libname.so>
Async-profiler was unable to parse non-Java function names because of
the corrupted contents in `/proc/[pid]/maps`. The problem is known to
occur in a container when running Ubuntu with Linux kernel 5.x.
This is the OS bug, see <https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1843018>.
### Could not open output file
Output file is written by the target JVM process, not by the profiler script.
Make sure the path specified in `-f` option is correct and is accessible by the JVM.
## Known Limitations
- No Java stacks will be collected if `-XX:MaxJavaStackTraceDepth` is zero
or negative. The exception is `--cstack vm` mode, which does not take
`MaxJavaStackTraceDepth` into account.
- Too short profiling interval may cause continuous interruption of heavy
system calls like `clone()`, so that it will never complete;
see [#97](https://github.com/async-profiler/async-profiler/issues/97).
The workaround is simply to increase the interval.
- When agent is not loaded at JVM startup (by using -agentpath option) it is
highly recommended to use `-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints` JVM flags.
Without those flags the profiler will still work correctly but results might be
less accurate. For example, without `-XX:+DebugNonSafepoints` there is a high chance
that simple inlined methods will not appear in the profile. When the agent is attached at runtime,
`CompiledMethodLoad` JVMTI event enables debug info, but only for methods compiled after attaching.
- On most Linux systems, `perf_events` captures call stacks with a maximum depth
of 127 frames. On recent Linux kernels, this can be configured using
`sysctl kernel.perf_event_max_stack` or by writing to the
`/proc/sys/kernel/perf_event_max_stack` file.
- You will not see the non-Java frames _preceding_ the Java frames on the
stack, unless `--cstack vmx` is specified.
For example, if `start_thread` called `JavaMain` and then your Java
code started running, you will not see the first two frames in the resulting
stack. On the other hand, you _will_ see non-Java frames (user and kernel)
invoked by your Java code.
- macOS profiling is limited to user space code only.

View File

@@ -1,358 +0,0 @@
COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
1. Definitions.
1.1. "Contributor" means each individual or entity that
creates or contributes to the creation of Modifications.
1.2. "Contributor Version" means the combination of the
Original Software, prior Modifications used by a
Contributor (if any), and the Modifications made by that
particular Contributor.
1.3. "Covered Software" means (a) the Original Software, or
(b) Modifications, or (c) the combination of files
containing Original Software with files containing
Modifications, in each case including portions thereof.
1.4. "Executable" means the Covered Software in any form
other than Source Code.
1.5. "Initial Developer" means the individual or entity
that first makes Original Software available under this
License.
1.6. "Larger Work" means a work which combines Covered
Software or portions thereof with code not governed by the
terms of this License.
1.7. "License" means this document.
1.8. "Licensable" means having the right to grant, to the
maximum extent possible, whether at the time of the initial
grant or subsequently acquired, any and all of the rights
conveyed herein.
1.9. "Modifications" means the Source Code and Executable
form of any of the following:
A. Any file that results from an addition to,
deletion from or modification of the contents of a
file containing Original Software or previous
Modifications;
B. Any new file that contains any part of the
Original Software or previous Modification; or
C. Any new file that is contributed or otherwise made
available under the terms of this License.
1.10. "Original Software" means the Source Code and
Executable form of computer software code that is
originally released under this License.
1.11. "Patent Claims" means any patent claim(s), now owned
or hereafter acquired, including without limitation,
method, process, and apparatus claims, in any patent
Licensable by grantor.
1.12. "Source Code" means (a) the common form of computer
software code in which modifications are made and (b)
associated documentation included in or with such code.
1.13. "You" (or "Your") means an individual or a legal
entity exercising rights under, and complying with all of
the terms of, this License. For legal entities, "You"
includes any entity which controls, is controlled by, or is
under common control with You. For purposes of this
definition, "control" means (a) the power, direct or
indirect, to cause the direction or management of such
entity, whether by contract or otherwise, or (b) ownership
of more than fifty percent (50%) of the outstanding shares
or beneficial ownership of such entity.
2. License Grants.
2.1. The Initial Developer Grant.
Conditioned upon Your compliance with Section 3.1 below and
subject to third party intellectual property claims, the
Initial Developer hereby grants You a world-wide,
royalty-free, non-exclusive license:
(a) under intellectual property rights (other than
patent or trademark) Licensable by Initial Developer,
to use, reproduce, modify, display, perform,
sublicense and distribute the Original Software (or
portions thereof), with or without Modifications,
and/or as part of a Larger Work; and
(b) under Patent Claims infringed by the making,
using or selling of Original Software, to make, have
made, use, practice, sell, and offer for sale, and/or
otherwise dispose of the Original Software (or
portions thereof).
(c) The licenses granted in Sections 2.1(a) and (b)
are effective on the date Initial Developer first
distributes or otherwise makes the Original Software
available to a third party under the terms of this
License.
(d) Notwithstanding Section 2.1(b) above, no patent
license is granted: (1) for code that You delete from
the Original Software, or (2) for infringements
caused by: (i) the modification of the Original
Software, or (ii) the combination of the Original
Software with other software or devices.
2.2. Contributor Grant.
Conditioned upon Your compliance with Section 3.1 below and
subject to third party intellectual property claims, each
Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
(a) under intellectual property rights (other than
patent or trademark) Licensable by Contributor to
use, reproduce, modify, display, perform, sublicense
and distribute the Modifications created by such
Contributor (or portions thereof), either on an
unmodified basis, with other Modifications, as
Covered Software and/or as part of a Larger Work; and
(b) under Patent Claims infringed by the making,
using, or selling of Modifications made by that
Contributor either alone and/or in combination with
its Contributor Version (or portions of such
combination), to make, use, sell, offer for sale,
have made, and/or otherwise dispose of: (1)
Modifications made by that Contributor (or portions
thereof); and (2) the combination of Modifications
made by that Contributor with its Contributor Version
(or portions of such combination).
(c) The licenses granted in Sections 2.2(a) and
2.2(b) are effective on the date Contributor first
distributes or otherwise makes the Modifications
available to a third party.
(d) Notwithstanding Section 2.2(b) above, no patent
license is granted: (1) for any code that Contributor
has deleted from the Contributor Version; (2) for
infringements caused by: (i) third party
modifications of Contributor Version, or (ii) the
combination of Modifications made by that Contributor
with other software (except as part of the
Contributor Version) or other devices; or (3) under
Patent Claims infringed by Covered Software in the
absence of Modifications made by that Contributor.
3. Distribution Obligations.
3.1. Availability of Source Code.
Any Covered Software that You distribute or otherwise make
available in Executable form must also be made available in
Source Code form and that Source Code form must be
distributed only under the terms of this License. You must
include a copy of this License with every copy of the
Source Code form of the Covered Software You distribute or
otherwise make available. You must inform recipients of any
such Covered Software in Executable form as to how they can
obtain such Covered Software in Source Code form in a
reasonable manner on or through a medium customarily used
for software exchange.
3.2. Modifications.
The Modifications that You create or to which You
contribute are governed by the terms of this License. You
represent that You believe Your Modifications are Your
original creation(s) and/or You have sufficient rights to
grant the rights conveyed by this License.
3.3. Required Notices.
You must include a notice in each of Your Modifications
that identifies You as the Contributor of the Modification.
You may not remove or alter any copyright, patent or
trademark notices contained within the Covered Software, or
any notices of licensing or any descriptive text giving
attribution to any Contributor or the Initial Developer.
3.4. Application of Additional Terms.
You may not offer or impose any terms on any Covered
Software in Source Code form that alters or restricts the
applicable version of this License or the recipients'
rights hereunder. You may choose to offer, and to charge a
fee for, warranty, support, indemnity or liability
obligations to one or more recipients of Covered Software.
However, you may do so only on Your own behalf, and not on
behalf of the Initial Developer or any Contributor. You
must make it absolutely clear that any such warranty,
support, indemnity or liability obligation is offered by
You alone, and You hereby agree to indemnify the Initial
Developer and every Contributor for any liability incurred
by the Initial Developer or such Contributor as a result of
warranty, support, indemnity or liability terms You offer.
3.5. Distribution of Executable Versions.
You may distribute the Executable form of the Covered
Software under the terms of this License or under the terms
of a license of Your choice, which may contain terms
different from this License, provided that You are in
compliance with the terms of this License and that the
license for the Executable form does not attempt to limit
or alter the recipient's rights in the Source Code form
from the rights set forth in this License. If You
distribute the Covered Software in Executable form under a
different license, You must make it absolutely clear that
any terms which differ from this License are offered by You
alone, not by the Initial Developer or Contributor. You
hereby agree to indemnify the Initial Developer and every
Contributor for any liability incurred by the Initial
Developer or such Contributor as a result of any such terms
You offer.
3.6. Larger Works.
You may create a Larger Work by combining Covered Software
with other code not governed by the terms of this License
and distribute the Larger Work as a single product. In such
a case, You must make sure the requirements of this License
are fulfilled for the Covered Software.
4. Versions of the License.
4.1. New Versions.
Sun Microsystems, Inc. is the initial license steward and
may publish revised and/or new versions of this License
from time to time. Each version will be given a
distinguishing version number. Except as provided in
Section 4.3, no one other than the license steward has the
right to modify this License.
4.2. Effect of New Versions.
You may always continue to use, distribute or otherwise
make the Covered Software available under the terms of the
version of the License under which You originally received
the Covered Software. If the Initial Developer includes a
notice in the Original Software prohibiting it from being
distributed or otherwise made available under any
subsequent version of the License, You must distribute and
make the Covered Software available under the terms of the
version of the License under which You originally received
the Covered Software. Otherwise, You may also choose to
use, distribute or otherwise make the Covered Software
available under the terms of any subsequent version of the
License published by the license steward.
4.3. Modified Versions.
When You are an Initial Developer and You want to create a
new license for Your Original Software, You may create and
use a modified version of this License if You: (a) rename
the license and remove any references to the name of the
license steward (except to note that the license differs
from this License); and (b) otherwise make it clear that
the license contains terms which differ from this License.
5. DISCLAIMER OF WARRANTY.
COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS"
BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED
SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR
PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND
PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY
COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE
INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF
ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF
WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF
ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS
DISCLAIMER.
6. TERMINATION.
6.1. This License and the rights granted hereunder will
terminate automatically if You fail to comply with terms
herein and fail to cure such breach within 30 days of
becoming aware of the breach. Provisions which, by their
nature, must remain in effect beyond the termination of
this License shall survive.
6.2. If You assert a patent infringement claim (excluding
declaratory judgment actions) against Initial Developer or
a Contributor (the Initial Developer or Contributor against
whom You assert such claim is referred to as "Participant")
alleging that the Participant Software (meaning the
Contributor Version where the Participant is a Contributor
or the Original Software where the Participant is the
Initial Developer) directly or indirectly infringes any
patent, then any and all rights granted directly or
indirectly to You by such Participant, the Initial
Developer (if the Initial Developer is not the Participant)
and all Contributors under Sections 2.1 and/or 2.2 of this
License shall, upon 60 days notice from Participant
terminate prospectively and automatically at the expiration
of such 60 day notice period, unless if within such 60 day
period You withdraw Your claim with respect to the
Participant Software against such Participant either
unilaterally or pursuant to a written agreement with
Participant.
6.3. In the event of termination under Sections 6.1 or 6.2
above, all end user licenses that have been validly granted
by You or any distributor hereunder prior to termination
(excluding licenses granted to You by any distributor)
shall survive termination.
7. LIMITATION OF LIABILITY.
UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
(INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE
INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF
COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE
LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR
CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK
STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER
COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN
INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL
INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT
APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO
NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR
CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT
APPLY TO YOU.
8. U.S. GOVERNMENT END USERS.
The Covered Software is a "commercial item," as that term is
defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial
computer software" (as that term is defined at 48 C.F.R. ¤
252.227-7014(a)(1)) and "commercial computer software
documentation" as such terms are used in 48 C.F.R. 12.212 (Sept.
1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1
through 227.7202-4 (June 1995), all U.S. Government End Users
acquire Covered Software with only those rights set forth herein.
This U.S. Government Rights clause is in lieu of, and supersedes,
any other FAR, DFAR, or other clause or provision that addresses
Government rights in computer software under this License.
9. MISCELLANEOUS.
This License represents the complete agreement concerning subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the
extent necessary to make it enforceable. This License shall be
governed by the law of the jurisdiction specified in a notice
contained within the Original Software (except to the extent
applicable law, if any, provides otherwise), excluding such
jurisdiction's conflict-of-law provisions. Any litigation
relating to this License shall be subject to the jurisdiction of
the courts located in the jurisdiction and venue specified in a
notice contained within the Original Software, with the losing
party responsible for costs, including, without limitation, court
costs and reasonable attorneys' fees and expenses. The
application of the United Nations Convention on Contracts for the
International Sale of Goods is expressly excluded. Any law or
regulation which provides that the language of a contract shall
be construed against the drafter shall not apply to this License.
You agree that You alone are responsible for compliance with the
United States export administration regulations (and the export
control laws and regulation of any other countries) when You use,
distribute or otherwise make available any Covered Software.
10. RESPONSIBILITY FOR CLAIMS.
As between Initial Developer and the Contributors, each party is
responsible for claims and damages arising, directly or
indirectly, out of its utilization of rights under this License
and You agree to work with Initial Developer and Contributors to
distribute such responsibility on an equitable basis. Nothing
herein is intended or shall be deemed to constitute any admission
of liability.

116
pom-converter.xml Normal file
View File

@@ -0,0 +1,116 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>tools.profiler</groupId>
<artifactId>jfr-converter</artifactId>
<version>4.4</version>
<packaging>jar</packaging>
<name>async-profiler</name>
<url>https://profiler.tools</url>
<description>Low overhead sampling profiler for Java</description>
<licenses>
<license>
<name>Apache License Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<url>https://github.com/async-profiler/async-profiler</url>
<connection>scm:git:git@github.com:async-profiler/async-profiler.git</connection>
<developerConnection>scm:git:git@github.com:async-profiler/async-profiler.git</developerConnection>
</scm>
<developers>
<developer>
<id>apangin</id>
<name>Andrei Pangin</name>
<email>noreply@pangin.pro</email>
</developer>
</developers>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<sourceDirectory>src/converter</sourceDirectory>
<resources>
<resource>
<directory>src/res</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<release>8</release>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>one.convert.Main</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.2.0</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.2.0</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.6</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.sonatype.central</groupId>
<artifactId>central-publishing-maven-plugin</artifactId>
<version>0.8.0</version>
<extensions>true</extensions>
<configuration>
<publishingServerId>central</publishingServerId>
</configuration>
</plugin>
</plugins>
</build>
</project>

164
pom.xml Normal file
View File

@@ -0,0 +1,164 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>tools.profiler</groupId>
<artifactId>async-profiler</artifactId>
<version>4.4</version>
<packaging>jar</packaging>
<name>async-profiler</name>
<url>https://profiler.tools</url>
<description>Low overhead sampling profiler for Java</description>
<licenses>
<license>
<name>Apache License Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<url>https://github.com/async-profiler/async-profiler</url>
<connection>scm:git:git@github.com:async-profiler/async-profiler.git</connection>
<developerConnection>scm:git:git@github.com:async-profiler/async-profiler.git</developerConnection>
</scm>
<developers>
<developer>
<id>apangin</id>
<name>Andrei Pangin</name>
<email>noreply@pangin.pro</email>
</developer>
</developers>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<sourceDirectory>src/api</sourceDirectory>
<resources>
<resource>
<directory>native</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<release>8</release>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<executions>
<execution>
<id>linux-x64-jar</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>linux-x64</classifier>
<includes>
<include>linux-x64/*</include>
<include>one/**</include>
</includes>
</configuration>
</execution>
<execution>
<id>linux-arm64-jar</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>linux-arm64</classifier>
<includes>
<include>linux-arm64/*</include>
<include>one/**</include>
</includes>
</configuration>
</execution>
<execution>
<id>macos-jar</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>macos</classifier>
<includes>
<include>macos/*</include>
<include>one/**</include>
</includes>
</configuration>
</execution>
</executions>
<configuration>
<archive>
<manifestFile>src/api/one/profiler/MANIFEST.MF</manifestFile>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.2.0</version>
<configuration>
<excludes>
<exclude>linux*/**</exclude>
<exclude>macos*/**</exclude>
</excludes>
</configuration>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.2.0</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.6</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.sonatype.central</groupId>
<artifactId>central-publishing-maven-plugin</artifactId>
<version>0.8.0</version>
<extensions>true</extensions>
<configuration>
<publishingServerId>central</publishingServerId>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -1,254 +0,0 @@
#!/bin/bash
usage() {
echo "Usage: $0 [action] [options] <pid>"
echo "Actions:"
echo " start start profiling and return immediately"
echo " stop stop profiling"
echo " status print profiling status"
echo " list list profiling events supported by the target JVM"
echo " collect collect profile for the specified period of time"
echo " and then stop (default action)"
echo "Options:"
echo " -e event profiling event: cpu|alloc|lock|cache-misses etc."
echo " -d duration run profiling for <duration> seconds"
echo " -f filename dump output to <filename>"
echo " -i interval sampling interval in nanoseconds"
echo " -j jstackdepth maximum Java stack depth"
echo " -b bufsize frame buffer size"
echo " -t profile different threads separately"
echo " -s simple class names instead of FQN"
echo " -g print method signatures"
echo " -a annotate Java method names"
echo " -o fmt[,fmt...] output format: summary|traces|flat|collapsed|svg|tree|jfr"
echo " -v, --version display version string"
echo ""
echo " --title string SVG title"
echo " --width px SVG width"
echo " --height px SVG frame height"
echo " --minwidth px skip frames smaller than px"
echo " --reverse generate stack-reversed FlameGraph / Call tree"
echo ""
echo " --all-kernel only include kernel-mode events"
echo " --all-user only include user-mode events"
echo ""
echo "<pid> is a numeric process ID of the target JVM"
echo " or 'jps' keyword to find running JVM automatically"
echo ""
echo "Example: $0 -d 30 -f profile.svg 3456"
echo " $0 start -i 999000 jps"
echo " $0 stop -o summary,flat jps"
exit 1
}
mirror_output() {
# Mirror output from temporary file to local terminal
if [[ $USE_TMP ]]; then
if [[ -f $FILE ]]; then
cat "$FILE"
rm "$FILE"
fi
fi
}
check_if_terminated() {
if ! kill -0 $PID 2> /dev/null; then
mirror_output
exit 0
fi
}
jattach() {
"$JATTACH" $PID load "$PROFILER" true "$1" > /dev/null
RET=$?
# Check if jattach failed
if [ $RET -ne 0 ]; then
if [ $RET -eq 255 ]; then
echo "Failed to inject profiler into $PID"
if [ "$UNAME_S" == "Darwin" ]; then
otool -L "$PROFILER"
else
ldd "$PROFILER"
fi
fi
exit $RET
fi
mirror_output
}
function abspath() {
if [ "$UNAME_S" == "Darwin" ]; then
perl -MCwd -e 'print Cwd::abs_path shift' "$1"
else
readlink -f "$1"
fi
}
OPTIND=1
UNAME_S=$(uname -s)
SCRIPT_DIR=$(dirname "$(abspath "$0")")
JATTACH=$SCRIPT_DIR/build/jattach
PROFILER=$SCRIPT_DIR/build/libasyncProfiler.so
ACTION="collect"
EVENT="cpu"
DURATION="60"
FILE=""
USE_TMP="true"
INTERVAL=""
JSTACKDEPTH=""
FRAMEBUF=""
THREADS=""
RING=""
OUTPUT=""
FORMAT=""
while [[ $# -gt 0 ]]; do
case $1 in
-h|"-?")
usage
;;
start|stop|status|list|collect)
ACTION="$1"
;;
-v|--version)
ACTION="version"
;;
-e)
EVENT="$2"
shift
;;
-d)
DURATION="$2"
shift
;;
-f)
FILE="$2"
unset USE_TMP
shift
;;
-i)
INTERVAL=",interval=$2"
shift
;;
-j)
JSTACKDEPTH=",jstackdepth=$2"
shift
;;
-b)
FRAMEBUF=",framebuf=$2"
shift
;;
-t)
THREADS=",threads"
;;
-s)
FORMAT="$FORMAT,simple"
;;
-g)
FORMAT="$FORMAT,sig"
;;
-a)
FORMAT="$FORMAT,ann"
;;
-o)
OUTPUT="$2"
shift
;;
--title)
# escape XML special characters and comma
TITLE=${2//&/&amp;}
TITLE=${TITLE//</&lt;}
TITLE=${TITLE//>/&gt;}
TITLE=${TITLE//,/&#44;}
FORMAT="$FORMAT,title=$TITLE"
shift
;;
--width|--height|--minwidth)
FORMAT="$FORMAT,${1:2}=$2"
shift
;;
--reverse)
FORMAT="$FORMAT,reverse"
;;
--all-kernel)
RING=",allkernel"
;;
--all-user)
RING=",alluser"
;;
[0-9]*)
PID="$1"
;;
jps)
# A shortcut for getting PID of a running Java application
# -XX:+PerfDisableSharedMem prevents jps from appearing in its own list
PID=$(pgrep -n java || jps -q -J-XX:+PerfDisableSharedMem)
;;
*)
echo "Unrecognized option: $1"
usage
;;
esac
shift
done
if [[ "$PID" == "" && "$ACTION" != "version" ]]; then
usage
fi
# If no -f argument is given, use temporary file to transfer output to caller terminal.
# Let the target process create the file in case this script is run by superuser.
if [[ $USE_TMP ]]; then
FILE=/tmp/async-profiler.$$.$PID
elif [[ $FILE != /* ]]; then
# Output file is written by the target process. Make the path absolute to avoid confusion.
FILE=$PWD/$FILE
fi
# select default output format
if [[ "$OUTPUT" == "" ]]; then
if [[ $FILE == *.svg ]]; then
OUTPUT="svg"
elif [[ $FILE == *.html ]]; then
OUTPUT="tree"
elif [[ $FILE == *.jfr ]]; then
OUTPUT="jfr"
elif [[ $FILE == *.collapsed ]] || [[ $FILE == *.folded ]]; then
OUTPUT="collapsed"
else
OUTPUT="summary,traces=200,flat=200"
fi
fi
case $ACTION in
start)
jattach "start,event=$EVENT,file=$FILE$INTERVAL$JSTACKDEPTH$FRAMEBUF$THREADS$RING,$OUTPUT$FORMAT"
;;
stop)
jattach "stop,file=$FILE,$OUTPUT$FORMAT"
;;
status)
jattach "status,file=$FILE"
;;
list)
jattach "list,file=$FILE"
;;
collect)
jattach "start,event=$EVENT,file=$FILE$INTERVAL$JSTACKDEPTH$FRAMEBUF$THREADS$RING,$OUTPUT$FORMAT"
while (( DURATION-- > 0 )); do
check_if_terminated
sleep 1
done
jattach "stop,file=$FILE,$OUTPUT$FORMAT"
;;
version)
if [[ "$PID" == "" ]]; then
java "-agentpath:$PROFILER=version" -version 2> /dev/null
else
jattach "version,file=$FILE"
fi
;;
esac

View File

@@ -1,148 +1,117 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#include <unistd.h>
#include <sys/mman.h>
#include "allocTracer.h"
#include "os.h"
#include "profiler.h"
#include "stackFrame.h"
#include "tsc.h"
#include "vmStructs.h"
// JDK 7-9
Trap AllocTracer::_in_new_tlab("_ZN11AllocTracer33send_allocation_in_new_tlab_event");
Trap AllocTracer::_outside_tlab("_ZN11AllocTracer34send_allocation_outside_tlab_event");
// JDK 10+
Trap AllocTracer::_in_new_tlab2("_ZN11AllocTracer27send_allocation_in_new_tlab");
Trap AllocTracer::_outside_tlab2("_ZN11AllocTracer28send_allocation_outside_tlab");
int AllocTracer::_trap_kind;
Trap AllocTracer::_in_new_tlab(0);
Trap AllocTracer::_outside_tlab(1);
u64 AllocTracer::_interval;
volatile u64 AllocTracer::_allocated_bytes;
// Resolve the address of the intercepted function
bool Trap::resolve(NativeCodeCache* libjvm) {
if (_entry != NULL) {
return true;
Error AllocTracer::initialize() {
if (_in_new_tlab.entry() == 0 || _outside_tlab.entry() == 0) {
CodeCache* libjvm = VMStructs::libjvm();
const void* ne;
const void* oe;
if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer27send_allocation_in_new_tlab")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer28send_allocation_outside_tlab")) != NULL) {
_trap_kind = 1; // JDK 10+
} else if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer33send_allocation_in_new_tlab_eventE11KlassHandleP8HeapWord")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer34send_allocation_outside_tlab_eventE11KlassHandleP8HeapWord")) != NULL) {
_trap_kind = 1; // JDK 8u262+
} else if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer33send_allocation_in_new_tlab_event")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer34send_allocation_outside_tlab_event")) != NULL) {
_trap_kind = 2; // JDK 7-9
} else {
return Error("No AllocTracer symbols found. Are JDK debug symbols installed?");
}
_in_new_tlab.assign(ne);
_outside_tlab.assign(oe);
_in_new_tlab.pair(_outside_tlab);
}
_entry = (instruction_t*)libjvm->findSymbolByPrefix(_func_name);
if (_entry != NULL) {
// Make the entry point writable, so we can rewrite instructions
long page_size = sysconf(_SC_PAGESIZE);
uintptr_t page_start = (uintptr_t)_entry & -page_size;
mprotect((void*)page_start, page_size, PROT_READ | PROT_WRITE | PROT_EXEC);
return true;
}
return false;
return Error::OK;
}
// Insert breakpoint at the very first instruction
void Trap::install() {
if (_entry != NULL) {
_saved_insn = *_entry;
*_entry = BREAKPOINT;
flushCache(_entry);
}
}
// Clear breakpoint - restore the original instruction
void Trap::uninstall() {
if (_entry != NULL) {
*_entry = _saved_insn;
flushCache(_entry);
}
}
// Called whenever our breakpoint trap is hit
void AllocTracer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
void AllocTracer::trapHandler(int signo, siginfo_t* siginfo, void* ucontext) {
StackFrame frame(ucontext);
EventType event_type;
uintptr_t total_size;
uintptr_t instance_size;
// PC points either to BREAKPOINT instruction or to the next one
if (frame.pc() - (uintptr_t)_in_new_tlab._entry <= sizeof(instruction_t)) {
// send_allocation_in_new_tlab_event(KlassHandle klass, size_t tlab_size, size_t alloc_size)
recordAllocation(ucontext, frame.arg0(), frame.arg1(), false);
} else if (frame.pc() - (uintptr_t)_outside_tlab._entry <= sizeof(instruction_t)) {
// send_allocation_outside_tlab_event(KlassHandle klass, size_t alloc_size);
recordAllocation(ucontext, frame.arg0(), frame.arg1(), true);
} else if (frame.pc() - (uintptr_t)_in_new_tlab2._entry <= sizeof(instruction_t)) {
if (_in_new_tlab.covers(frame.pc())) {
// send_allocation_in_new_tlab(Klass* klass, HeapWord* obj, size_t tlab_size, size_t alloc_size, Thread* thread)
recordAllocation(ucontext, frame.arg0(), frame.arg2(), false);
} else if (frame.pc() - (uintptr_t)_outside_tlab2._entry <= sizeof(instruction_t)) {
// send_allocation_in_new_tlab_event(KlassHandle klass, size_t tlab_size, size_t alloc_size)
event_type = ALLOC_SAMPLE;
total_size = _trap_kind == 1 ? frame.arg2() : frame.arg1();
instance_size = _trap_kind == 1 ? frame.arg3() : frame.arg2();
} else if (_outside_tlab.covers(frame.pc())) {
// send_allocation_outside_tlab(Klass* klass, HeapWord* obj, size_t alloc_size, Thread* thread)
recordAllocation(ucontext, frame.arg0(), frame.arg2(), true);
// send_allocation_outside_tlab_event(KlassHandle klass, size_t alloc_size);
event_type = ALLOC_OUTSIDE_TLAB;
total_size = _trap_kind == 1 ? frame.arg2() : frame.arg1();
instance_size = 0;
} else {
// Not our trap; nothing to do
// Not our trap
Profiler::instance()->trapHandler(signo, siginfo, ucontext);
return;
}
// Leave the trapped function by simulating "ret" instruction
uintptr_t klass = frame.arg0();
frame.ret();
if (_enabled && updateCounter(_allocated_bytes, total_size, _interval)) {
recordAllocation(ucontext, event_type, klass, total_size, instance_size);
}
}
void AllocTracer::recordAllocation(void* ucontext, uintptr_t rklass, uintptr_t rsize, bool outside_tlab) {
if (_interval) {
// Do not record allocation unless allocated at least _interval bytes
while (true) {
u64 prev = _allocated_bytes;
u64 next = prev + rsize;
if (next < _interval) {
if (__sync_bool_compare_and_swap(&_allocated_bytes, prev, next)) {
return;
}
} else {
if (__sync_bool_compare_and_swap(&_allocated_bytes, prev, next % _interval)) {
break;
}
}
}
void AllocTracer::recordAllocation(void* ucontext, EventType event_type, uintptr_t rklass,
uintptr_t total_size, uintptr_t instance_size) {
AllocEvent event;
event._start_time = TSC::ticks();
event._class_id = 0;
event._total_size = total_size;
event._instance_size = instance_size;
if (VMStructs::hasClassNames()) {
VMSymbol* symbol = VMKlass::fromHandle(rklass)->name();
event._class_id = Profiler::instance()->classMap()->lookup(symbol->body(), symbol->length());
}
VMSymbol* symbol = VMKlass::fromHandle(rklass)->name();
if (outside_tlab) {
// Invert the last bit to distinguish jmethodID from the allocation in new TLAB
Profiler::_instance.recordSample(ucontext, rsize, BCI_SYMBOL_OUTSIDE_TLAB, (jmethodID)((uintptr_t)symbol ^ 1));
} else {
Profiler::_instance.recordSample(ucontext, rsize, BCI_SYMBOL, (jmethodID)symbol);
}
Profiler::instance()->recordSample(ucontext, total_size, event_type, &event);
}
Error AllocTracer::start(Arguments& args) {
if (!VMStructs::available()) {
return Error("VMStructs unavailable. Unsupported JVM?");
if (args._live && !args._all) {
// This engine is only going to be selected in Profiler::selectAllocEngine
// when can_generate_sampled_object_alloc_events is not available, i.e. JDK<11.
return Error("'live' option is supported on OpenJDK 11+");
}
NativeCodeCache* libjvm = Profiler::_instance.jvmLibrary();
if (!(_in_new_tlab.resolve(libjvm) || _in_new_tlab2.resolve(libjvm)) ||
!(_outside_tlab.resolve(libjvm) || _outside_tlab2.resolve(libjvm))) {
return Error("No AllocTracer symbols found. Are JDK debug symbols installed?");
}
Error error = initialize();
if (error) return error;
_interval = args._interval;
_interval = args._alloc > 0 ? args._alloc : 0;
_allocated_bytes = 0;
OS::installSignalHandler(SIGTRAP, signalHandler);
_in_new_tlab.install();
_outside_tlab.install();
_in_new_tlab2.install();
_outside_tlab2.install();
if (!_in_new_tlab.install() || !_outside_tlab.install()) {
return Error("Cannot install allocation breakpoints");
}
return Error::OK;
}
@@ -150,6 +119,4 @@ Error AllocTracer::start(Arguments& args) {
void AllocTracer::stop() {
_in_new_tlab.uninstall();
_outside_tlab.uninstall();
_in_new_tlab2.uninstall();
_outside_tlab2.uninstall();
}

View File

@@ -1,17 +1,6 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _ALLOCTRACER_H
@@ -19,48 +8,31 @@
#include <signal.h>
#include <stdint.h>
#include "arch.h"
#include "codeCache.h"
#include "engine.h"
// Describes OpenJDK function being intercepted
class Trap {
private:
const char* _func_name;
instruction_t* _entry;
instruction_t _saved_insn;
public:
Trap(const char* func_name) : _func_name(func_name), _entry(NULL) {
}
bool resolve(NativeCodeCache* libjvm);
void install();
void uninstall();
friend class AllocTracer;
};
#include "event.h"
#include "trap.h"
class AllocTracer : public Engine {
private:
// JDK 7-9
static int _trap_kind;
static Trap _in_new_tlab;
static Trap _outside_tlab;
// JDK 10+
static Trap _in_new_tlab2;
static Trap _outside_tlab2;
static u64 _interval;
static volatile u64 _allocated_bytes;
static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext);
static void recordAllocation(void* ucontext, uintptr_t rklass, uintptr_t rsize, bool outside_tlab);
static Error initialize();
static void recordAllocation(void* ucontext, EventType event_type, uintptr_t rklass,
uintptr_t total_size, uintptr_t instance_size);
public:
const char* name() {
return "alloc";
const char* type() {
return "alloc_tracer";
}
const char* title() {
return "Allocation profile";
}
const char* units() {
@@ -69,6 +41,8 @@ class AllocTracer : public Engine {
Error start(Arguments& args);
void stop();
static void trapHandler(int signo, siginfo_t* siginfo, void* ucontext);
};
#endif // _ALLOCTRACER_H

View File

@@ -0,0 +1,26 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.profiler;
import javax.management.ObjectName;
import java.lang.management.ManagementFactory;
public class Agent {
public static void premain(String args) throws Exception {
agentmain(args);
}
public static void agentmain(String args) throws Exception {
AsyncProfiler profiler = AsyncProfiler.getInstance();
ManagementFactory.getPlatformMBeanServer().registerMBean(
profiler,
new ObjectName(AsyncProfilerMXBean.OBJECT_NAME));
if (args != null && !args.isEmpty()) {
profiler.execute(args);
}
}
}

View File

@@ -0,0 +1,300 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.profiler;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* Java API for in-process profiling. Serves as a wrapper around
* async-profiler native library. This class is a singleton.
* The first call to {@link #getInstance()} initiates loading of
* libasyncProfiler.so.
*/
public class AsyncProfiler implements AsyncProfilerMXBean {
private static AsyncProfiler instance;
private AsyncProfiler() {
}
public static AsyncProfiler getInstance() {
return getInstance(null);
}
public static synchronized AsyncProfiler getInstance(String libPath) {
if (instance != null) {
return instance;
}
AsyncProfiler profiler = new AsyncProfiler();
if (libPath != null) {
System.load(libPath);
} else {
try {
// No need to load library, if it has been preloaded with -agentpath
profiler.getVersion();
} catch (UnsatisfiedLinkError e) {
String libraryPath = System.getProperty("one.profiler.libraryPath");
if (libraryPath != null && !libraryPath.isEmpty()) {
System.load(new File(libraryPath).getAbsolutePath());
} else {
File file = extractEmbeddedLib();
if (file != null) {
try {
System.load(file.getAbsolutePath());
} finally {
file.delete();
}
} else {
System.loadLibrary("asyncProfiler");
}
}
}
}
instance = profiler;
return profiler;
}
private static File extractEmbeddedLib() {
String resourceName = "/" + getPlatformTag() + "/libasyncProfiler.so";
InputStream in = AsyncProfiler.class.getResourceAsStream(resourceName);
if (in == null) {
return null;
}
try {
String extractPath = System.getProperty("one.profiler.extractPath");
File file = File.createTempFile("libasyncProfiler-", ".so",
extractPath == null || extractPath.isEmpty() ? null : new File(extractPath));
try (FileOutputStream out = new FileOutputStream(file)) {
byte[] buf = new byte[32000];
for (int bytes; (bytes = in.read(buf)) >= 0; ) {
out.write(buf, 0, bytes);
}
}
return file;
} catch (IOException e) {
throw new IllegalStateException(e);
} finally {
try {
in.close();
} catch (IOException e) {
// ignore
}
}
}
private static String getPlatformTag() {
String os = System.getProperty("os.name").toLowerCase();
String arch = System.getProperty("os.arch").toLowerCase();
if (os.contains("linux")) {
if (arch.equals("amd64") || arch.equals("x86_64") || arch.contains("x64")) {
return "linux-x64";
} else if (arch.equals("aarch64") || arch.contains("arm64")) {
return "linux-arm64";
} else if (arch.equals("aarch32") || arch.contains("arm")) {
return "linux-arm32";
} else if (arch.contains("86")) {
return "linux-x86";
} else if (arch.contains("ppc64")) {
return "linux-ppc64le";
}
} else if (os.contains("mac")) {
return "macos";
}
throw new UnsupportedOperationException("Unsupported platform: " + os + "-" + arch);
}
/**
* Start profiling
*
* @param event Profiling event, see {@link Events}
* @param interval Sampling interval, e.g. nanoseconds for Events.CPU
* @throws IllegalStateException If profiler is already running
*/
@Override
public void start(String event, long interval) throws IllegalStateException {
if (event == null) {
throw new NullPointerException();
}
start0(event, interval, true);
}
/**
* Start or resume profiling without resetting collected data.
* Note that event and interval may change since the previous profiling session.
*
* @param event Profiling event, see {@link Events}
* @param interval Sampling interval, e.g. nanoseconds for Events.CPU
* @throws IllegalStateException If profiler is already running
*/
@Override
public void resume(String event, long interval) throws IllegalStateException {
if (event == null) {
throw new NullPointerException();
}
start0(event, interval, false);
}
/**
* Stop profiling (without dumping results)
*
* @throws IllegalStateException If profiler is not running
*/
@Override
public void stop() throws IllegalStateException {
stop0();
}
/**
* Get the number of samples collected during the profiling session
*
* @return Number of samples
*/
@Override
public native long getSamples();
/**
* Get profiler agent version, e.g. "1.0"
*
* @return Version string
*/
@Override
public String getVersion() {
try {
return execute0("version");
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Execute an agent-compatible profiling command -
* the comma-separated list of arguments defined in arguments.cpp
*
* @param command Profiling command
* @return The command result
* @throws IllegalArgumentException If failed to parse the command
* @throws IOException If failed to create output file
*/
@Override
public String execute(String command) throws IllegalArgumentException, IllegalStateException, IOException {
if (command == null) {
throw new NullPointerException();
}
return execute0(command);
}
/**
* Dump profile in 'collapsed stacktraces' format
*
* @param counter Which counter to display in the output
* @return Textual representation of the profile
*/
@Override
public String dumpCollapsed(Counter counter) {
try {
return execute0("collapsed," + (counter == Counter.SAMPLES ? "samples" : "total"));
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Dump collected stack traces
*
* @param maxTraces Maximum number of stack traces to dump. 0 means no limit
* @return Textual representation of the profile
*/
@Override
public String dumpTraces(int maxTraces) {
try {
return execute0(maxTraces == 0 ? "traces" : "traces=" + maxTraces);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Dump flat profile, i.e. the histogram of the hottest methods
*
* @param maxMethods Maximum number of methods to dump. 0 means no limit
* @return Textual representation of the profile
*/
@Override
public String dumpFlat(int maxMethods) {
try {
return execute0(maxMethods == 0 ? "flat" : "flat=" + maxMethods);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Dump collected data in OTLP format.
* <p>
* This API is UNSTABLE and might change or be removed in the next version of async-profiler.
*
* @param counter Which counter to use for aggregation
* @return OTLP representation of the profile
*/
@Override
public byte[] dumpOtlp(Counter counter) {
try {
return execute1("otlp," + (counter == Counter.SAMPLES ? "samples" : "total"));
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Add the given thread to the set of profiled threads.
* 'filter' option must be enabled to use this method.
*
* @param thread Thread to include in profiling
*/
public void addThread(Thread thread) {
filterThread(thread, true);
}
/**
* Remove the given thread from the set of profiled threads.
* 'filter' option must be enabled to use this method.
*
* @param thread Thread to exclude from profiling
*/
public void removeThread(Thread thread) {
filterThread(thread, false);
}
private void filterThread(Thread thread, boolean enable) {
if (thread == null || thread == Thread.currentThread()) {
filterThread0(null, enable);
} else {
// Need to take lock to avoid race condition with a thread state change
synchronized (thread) {
Thread.State state = thread.getState();
if (state != Thread.State.NEW && state != Thread.State.TERMINATED) {
filterThread0(thread, enable);
}
}
}
}
private native void start0(String event, long interval, boolean reset) throws IllegalStateException;
private native void stop0() throws IllegalStateException;
private native String execute0(String command) throws IllegalArgumentException, IllegalStateException, IOException;
private native byte[] execute1(String command) throws IllegalArgumentException, IllegalStateException, IOException;
private native void filterThread0(Thread thread, boolean enable);
}

View File

@@ -1,17 +1,6 @@
/*
* Copyright 2018 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.profiler;
@@ -28,15 +17,19 @@ package one.profiler;
* }</pre>
*/
public interface AsyncProfilerMXBean {
String OBJECT_NAME = "one.profiler:type=AsyncProfiler";
void start(String event, long interval) throws IllegalStateException;
void resume(String event, long interval) throws IllegalStateException;
void stop() throws IllegalStateException;
long getSamples();
String getVersion();
String execute(String command) throws IllegalArgumentException, java.io.IOException;
String execute(String command) throws IllegalArgumentException, IllegalStateException, java.io.IOException;
String dumpCollapsed(Counter counter);
String dumpTraces(int maxTraces);
String dumpFlat(int maxMethods);
byte[] dumpOtlp(Counter counter);
}

View File

@@ -0,0 +1,14 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.profiler;
/**
* Which metrics to use when generating profile in collapsed stack traces format.
*/
public enum Counter {
SAMPLES,
TOTAL
}

View File

@@ -0,0 +1,18 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.profiler;
/**
* Predefined event names to use in {@link AsyncProfiler#start(String, long)}
*/
public class Events {
public static final String CPU = "cpu";
public static final String ALLOC = "alloc";
public static final String LOCK = "lock";
public static final String WALL = "wall";
public static final String CTIMER = "ctimer";
public static final String ITIMER = "itimer";
}

View File

@@ -0,0 +1,2 @@
Agent-Class: one.profiler.Agent
Premain-Class: one.profiler.Agent

View File

@@ -1,68 +1,203 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _ARCH_H
#define _ARCH_H
#ifndef likely
# define likely(x) (__builtin_expect(!!(x), 1))
#endif
#ifndef unlikely
# define unlikely(x) (__builtin_expect(!!(x), 0))
#endif
#ifdef _LP64
# define LP64_ONLY(code) code
#else // !_LP64
# define LP64_ONLY(code)
#endif // _LP64
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
static inline u64 atomicInc(volatile u64& var, u64 increment = 1) {
return __sync_fetch_and_add(&var, increment);
template<typename T>
static inline T atomicInc(T& var, T increment = 1) {
return __atomic_fetch_add(&var, increment, __ATOMIC_ACQ_REL);
}
static inline int atomicInc(volatile int& var, int increment = 1) {
return __sync_fetch_and_add(&var, increment);
template<typename T>
static inline T atomicDec(T& var, T decrement = 1) {
return __atomic_fetch_sub(&var, decrement, __ATOMIC_ACQ_REL);
}
template<typename T>
static inline T loadAcquire(T& var) {
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
}
template<typename T, typename U>
static inline void storeRelease(T& var, U value) {
__atomic_store_n(&var, static_cast<T>(value), __ATOMIC_RELEASE);
}
#if defined(__x86_64__) || defined(__i386__)
typedef unsigned char instruction_t;
const instruction_t BREAKPOINT = 0xcc;
const int BREAKPOINT_OFFSET = 0;
const int SYSCALL_SIZE = 2;
const int FRAME_PC_SLOT = 1;
const int PLT_HEADER_SIZE = 16;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 8; // PERF_REG_X86_IP
#define spinPause() asm volatile("pause")
#define rmb() asm volatile("lfence" : : : "memory")
#define flushCache(addr) asm volatile("mfence; clflush (%0); mfence" : : "r"(addr) : "memory")
#define flushCache(addr) asm volatile("mfence; clflush (%0); mfence" : : "r" (addr) : "memory")
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() ((void**)__builtin_frame_address(0) + 2)
#elif defined(__arm__) || defined(__thumb__)
typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0xe7f001f0;
const instruction_t BREAKPOINT_THUMB = 0xde01de01;
const int BREAKPOINT_OFFSET = 0;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int PLT_HEADER_SIZE = 20;
const int PLT_ENTRY_SIZE = 12;
const int PERF_REG_PC = 15; // PERF_REG_ARM_PC
#define spinPause() asm volatile("yield")
#define rmb() asm volatile("dmb ish" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() __builtin_frame_address(1)
#elif defined(__aarch64__)
typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0xd4200000;
const int BREAKPOINT_OFFSET = 0;
#define spinPause() asm volatile("yield")
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int PLT_HEADER_SIZE = 32;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC
#define spinPause() asm volatile("isb")
#define rmb() asm volatile("dmb ish" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#else
#warning "Compiling on unsupported arch"
#define callerPC() ({ void* pc; asm volatile("adr %0, ." : "=r"(pc)); pc; })
#define callerFP() ({ void* fp; asm volatile("mov %0, fp" : "=r"(fp)); fp; })
#define callerSP() ({ void* sp; asm volatile("mov %0, sp" : "=r"(sp)); sp; })
#define spinPause()
#define rmb() __sync_synchronize()
#elif defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0x7fe00008;
// We place the break point in the third instruction slot on PPCLE as the first two are skipped if
// the call comes from within the same compilation unit according to the LE ABI.
const int BREAKPOINT_OFFSET = 8;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 2;
const int PLT_HEADER_SIZE = 24;
const int PLT_ENTRY_SIZE = 24;
const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP
#define spinPause() asm volatile("yield") // does nothing, but using or 1,1,1 would lead to other problems
#define rmb() asm volatile ("sync" : : : "memory") // lwsync would do but better safe than sorry
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() __builtin_frame_address(0)
#elif defined(__riscv) && (__riscv_xlen == 64)
typedef unsigned int instruction_t;
#if defined(__riscv_compressed)
const instruction_t BREAKPOINT = 0x9002; // EBREAK (compressed form)
#else
const instruction_t BREAKPOINT = 0x00100073; // EBREAK
#endif
const int BREAKPOINT_OFFSET = 0;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1; // return address is at -1 from FP
const int PLT_HEADER_SIZE = 24; // Best guess from examining readelf
const int PLT_ENTRY_SIZE = 24; // ...same...
const int PERF_REG_PC = 0; // PERF_REG_RISCV_PC
#define spinPause() // No architecture support
#define rmb() asm volatile ("fence" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() __builtin_frame_address(0)
#elif defined(__loongarch_lp64)
typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0x002a0005; // EBREAK
const int BREAKPOINT_OFFSET = 0;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int PLT_HEADER_SIZE = 32;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 0; // PERF_REG_LOONGARCH_PC
#define spinPause() asm volatile("ibar 0x0")
#define rmb() asm volatile("dbar 0x0" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() __builtin_frame_address(0)
#else
#error "Compiling on unsupported arch"
#endif
// On Apple M1 and later processors, memory is either writable or executable (W^X)
#if defined(__aarch64__) && defined(__APPLE__)
# define WX_MEMORY true
#else
# define WX_MEMORY false
#endif
// Pointer authentication (PAC) support.
// Only 48-bit virtual addresses are currently supported.
#ifdef __aarch64__
const unsigned long PAC_MASK = WX_MEMORY ? 0x7fffffffffffUL : 0xffffffffffffUL;
static inline const void* stripPointer(const void* p) {
return (const void*) ((unsigned long)p & PAC_MASK);
}
#else
# define stripPointer(p) (p)
#endif

View File

@@ -1,17 +1,6 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#include <limits.h>
@@ -22,52 +11,33 @@
#include <sys/types.h>
#include <unistd.h>
#include "arguments.h"
#include "os.h"
// Arguments of the last start/resume command; reused for shutdown and restart
Arguments _global_args;
// Predefined value that denotes successful operation
const Error Error::OK(NULL);
// Extra buffer space for expanding file pattern
const size_t EXTRA_BUF_SIZE = 512;
// Statically compute hash code of a string containing up to 12 [a-z] letters
#define HASH(s) ((s[0] & 31LL) | (s[1] & 31LL) << 5 | (s[2] & 31LL) << 10 | (s[3] & 31LL) << 15 | \
(s[4] & 31LL) << 20 | (s[5] & 31LL) << 25 | (s[6] & 31LL) << 30 | (s[7] & 31LL) << 35 | \
(s[8] & 31LL) << 40 | (s[9] & 31LL) << 45 | (s[10] & 31LL) << 50 | (s[11] & 31LL) << 55)
// Simulate switch statement over string hashes
#define SWITCH(arg) long long arg_hash = hash(arg); if (0)
#define CASE(s) } else if (arg_hash == HASH(s " ")) {
#define DEFAULT() } else {
// Parses agent arguments.
// The format of the string is:
// arg[,arg...]
// where arg is one of the following options:
// start - start profiling
// stop - stop profiling
// status - print profiling status (inactive / running for X seconds)
// list - show the list of available profiling events
// version - display the agent version
// event=EVENT - which event to trace (cpu, alloc, lock, cache-misses etc.)
// collapsed[=C] - dump collapsed stacks (the format used by FlameGraph script)
// svg[=C] - produce Flame Graph in SVG format
// tree[=C] - produce call tree in HTML format
// C is counter type: 'samples' or 'total'
// jfr - dump events in Java Flight Recorder format
// summary - dump profiling summary (number of collected samples of each type)
// traces[=N] - dump top N call traces
// flat[=N] - dump top N methods (aka flat profile)
// interval=N - sampling interval in ns (default: 10'000'000, i.e. 10 ms)
// jstackdepth=N - maximum Java stack depth (default: 2048)
// framebuf=N - size of the buffer for stack frames (default: 1'000'000)
// threads - profile different threads separately
// allkernel - include only kernel-mode events
// alluser - include only user-mode events
// simple - simple class names instead of FQN
// dot - dotted class names
// sig - print method signatures
// ann - annotate Java method names
// title=TITLE - FlameGraph title
// width=PX - FlameGraph image width
// height=PX - FlameGraph frame height
// minwidth=PX - FlameGraph minimum frame width
// reverse - generate stack-reversed FlameGraph / Call tree
// file=FILENAME - output file name for dumping
//
// It is possible to specify multiple dump options at the same time
// The format of the string is: arg[,arg...]
Error Arguments::parse(const char* args) {
if (args == NULL) {
return Error::OK;
@@ -75,108 +45,423 @@ Error Arguments::parse(const char* args) {
size_t len = strlen(args);
free(_buf);
_buf = (char*)malloc(len + EXTRA_BUF_SIZE);
_buf = (char*)malloc(len + EXTRA_BUF_SIZE + 1);
if (_buf == NULL) {
return Error("Not enough memory to parse arguments");
}
strcpy(_buf, args);
char* args_copy = strcpy(_buf + EXTRA_BUF_SIZE, args);
for (char* arg = strtok(_buf, ","); arg != NULL; arg = strtok(NULL, ",")) {
const char* msg = NULL;
for (char* arg = strtok(args_copy, ","); arg != NULL; arg = strtok(NULL, ",")) {
char* value = strchr(arg, '=');
if (value != NULL) *value++ = 0;
if (strcmp(arg, "start") == 0) {
_action = ACTION_START;
} else if (strcmp(arg, "stop") == 0) {
_action = ACTION_STOP;
} else if (strcmp(arg, "status") == 0) {
_action = ACTION_STATUS;
} else if (strcmp(arg, "list") == 0) {
_action = ACTION_LIST;
} else if (strcmp(arg, "version") == 0) {
_action = ACTION_VERSION;
} else if (strcmp(arg, "event") == 0) {
if (value == NULL || value[0] == 0) {
return Error("event must not be empty");
}
_event = value;
} else if (strcmp(arg, "collapsed") == 0 || strcmp(arg, "folded") == 0) {
_dump_collapsed = true;
_counter = value == NULL || strcmp(value, "samples") == 0 ? COUNTER_SAMPLES : COUNTER_TOTAL;
} else if (strcmp(arg, "flamegraph") == 0 || strcmp(arg, "svg") == 0) {
_dump_flamegraph = true;
_counter = value == NULL || strcmp(value, "samples") == 0 ? COUNTER_SAMPLES : COUNTER_TOTAL;
} else if (strcmp(arg, "tree") == 0) {
_dump_tree = true;
_counter = value == NULL || strcmp(value, "samples") == 0 ? COUNTER_SAMPLES : COUNTER_TOTAL;
} else if (strcmp(arg, "jfr") == 0) {
_dump_jfr = true;
} else if (strcmp(arg, "summary") == 0) {
_dump_summary = true;
} else if (strcmp(arg, "traces") == 0) {
_dump_traces = value == NULL ? INT_MAX : atoi(value);
} else if (strcmp(arg, "flat") == 0) {
_dump_flat = value == NULL ? INT_MAX : atoi(value);
} else if (strcmp(arg, "interval") == 0) {
if (value == NULL || (_interval = parseUnits(value)) <= 0) {
return Error("interval must be > 0");
}
} else if (strcmp(arg, "jstackdepth") == 0) {
if (value == NULL || (_jstackdepth = atoi(value)) <= 0) {
return Error("jstackdepth must be > 0");
}
} else if (strcmp(arg, "framebuf") == 0) {
if (value == NULL || (_framebuf = atoi(value)) <= 0) {
return Error("framebuf must be > 0");
}
} else if (strcmp(arg, "threads") == 0) {
_threads = true;
} else if (strcmp(arg, "allkernel") == 0) {
_ring = RING_KERNEL;
} else if (strcmp(arg, "alluser") == 0) {
_ring = RING_USER;
} else if (strcmp(arg, "simple") == 0) {
_style |= STYLE_SIMPLE;
} else if (strcmp(arg, "dot") == 0) {
_style |= STYLE_DOTTED;
} else if (strcmp(arg, "sig") == 0) {
_style |= STYLE_SIGNATURES;
} else if (strcmp(arg, "ann") == 0) {
_style |= STYLE_ANNOTATE;
} else if (strcmp(arg, "title") == 0 && value != NULL) {
_title = value;
} else if (strcmp(arg, "width") == 0 && value != NULL) {
_width = atoi(value);
} else if (strcmp(arg, "height") == 0 && value != NULL) {
_height = atoi(value);
} else if (strcmp(arg, "minwidth") == 0 && value != NULL) {
_minwidth = atof(value);
} else if (strcmp(arg, "reverse") == 0) {
_reverse = true;
} else if (strcmp(arg, "file") == 0) {
if (value == NULL || value[0] == 0) {
return Error("file must not be empty");
}
_file = value;
SWITCH (arg) {
// Actions
CASE("start")
_action = ACTION_START;
CASE("resume")
_action = ACTION_RESUME;
CASE("stop")
_action = ACTION_STOP;
CASE("dump")
_action = ACTION_DUMP;
CASE("status")
_action = ACTION_STATUS;
CASE("metrics")
_action = ACTION_METRICS;
CASE("list")
_action = ACTION_LIST;
CASE("version")
_action = ACTION_VERSION;
// Output formats
CASE("collapsed")
_output = OUTPUT_COLLAPSED;
CASE("flamegraph")
_output = OUTPUT_FLAMEGRAPH;
CASE("tree")
_output = OUTPUT_TREE;
CASE("jfr")
_output = OUTPUT_JFR;
CASE("jfropts")
_output = OUTPUT_JFR;
if (value == NULL) {
msg = "Invalid jfropts";
} else if (value[0] >= '0' && value[0] <= '9') {
_jfr_options = (int)strtol(value, NULL, 0);
} else if (strstr(value, "mem")) {
_jfr_options |= IN_MEMORY;
}
CASE("jfrsync")
_output = OUTPUT_JFR;
_jfr_options |= JFR_SYNC_OPTS;
_jfr_sync = value == NULL ? "default" : value;
CASE("traces")
_output = OUTPUT_TEXT;
_dump_traces = value == NULL ? INT_MAX : atoi(value);
CASE("flat")
_output = OUTPUT_TEXT;
_dump_flat = value == NULL ? INT_MAX : atoi(value);
CASE("otlp")
_output = OUTPUT_OTLP;
CASE("samples")
_counter = COUNTER_SAMPLES;
CASE("total")
_counter = COUNTER_TOTAL;
CASE("chunksize")
if (value == NULL || (_chunk_size = parseUnits(value, BYTES)) < 0) {
msg = "Invalid chunksize";
}
CASE("chunktime")
if (value == NULL || (_chunk_time = parseUnits(value, SECONDS)) < 0) {
msg = "Invalid chunktime";
}
// Basic options
CASE("event")
if (value == NULL || value[0] == 0) {
msg = "event must not be empty";
} else if (strcmp(value, EVENT_ALLOC) == 0) {
if (_alloc < 0) _alloc = 0;
} else if (strcmp(value, EVENT_NATIVEMEM) == 0) {
if (_nativemem < 0) _nativemem = 0;
} else if (strcmp(value, EVENT_LOCK) == 0) {
if (_lock < 0) _lock = DEFAULT_LOCK_INTERVAL;
} else if (strcmp(value, EVENT_NATIVELOCK) == 0) {
if (_nativelock < 0) _nativelock = DEFAULT_LOCK_INTERVAL;
} else if (_event != NULL && !_all) {
msg = "Duplicate event argument";
} else {
_event = value;
}
CASE("timeout")
if (value == NULL || (_timeout = parseTimeout(value)) == -1) {
msg = "Invalid timeout";
}
CASE("loop")
if (value == NULL || (_loop = parseTimeout(value)) == -1) {
msg = "Invalid loop duration";
}
CASE("memlimit")
_mem_limit = value == NULL ? 0 : parseUnits(value, BYTES);
CASE("alloc")
_alloc = value == NULL ? 0 : parseUnits(value, BYTES);
CASE("tlab")
_tlab = true;
CASE("nativemem")
_nativemem = value == NULL ? 0 : parseUnits(value, BYTES);
CASE("nofree")
_nofree = true;
CASE("trace")
_trace.push_back(value);
CASE("lock")
_lock = value == NULL ? DEFAULT_LOCK_INTERVAL : parseUnits(value, NANOS);
CASE("nativelock")
_nativelock = value == NULL ? DEFAULT_LOCK_INTERVAL : parseUnits(value, NANOS);
CASE("wall")
_wall = value == NULL ? 0 : parseUnits(value, NANOS);
CASE("proc")
_proc = value == NULL ? DEFAULT_PROC_INTERVAL : parseUnits(value, SECONDS);
CASE("cpu")
if (_event != NULL) {
msg = "Duplicate event argument";
} else {
_event = EVENT_CPU;
}
CASE("all")
_all = true;
_live = true;
if (_wall < 0) {
_wall = 0;
}
if (_alloc < 0) {
_alloc = 0;
}
if (_lock < 0) {
_lock = DEFAULT_LOCK_INTERVAL;
}
if (_nativelock < 0) {
_nativelock = DEFAULT_LOCK_INTERVAL;
}
if (_nativemem < 0) {
_nativemem = DEFAULT_ALLOC_INTERVAL;
}
if (_proc < 0 && OS::isLinux()) {
_proc = DEFAULT_PROC_INTERVAL;
}
if (_event == NULL && OS::isLinux()) {
_event = EVENT_CPU;
}
CASE("interval")
if (value == NULL || (_interval = parseUnits(value, UNIVERSAL)) <= 0) {
msg = "Invalid interval";
}
CASE("jstackdepth")
if (value == NULL || (_jstackdepth = atoi(value)) <= 0) {
msg = "jstackdepth must be > 0";
} else {
char* slash = strchr(value, '/');
_truncated_stack_depth = slash != NULL ? atoi(slash + 1) : _jstackdepth;
}
CASE("signal")
if (value == NULL || (_signal = atoi(value)) <= 0) {
msg = "signal must be > 0";
} else if ((value = strchr(value, '/')) != NULL) {
// Two signals were specified: one for CPU profiling, another for wall clock
_signal |= atoi(value + 1) << 8;
}
CASE("features")
if (value != NULL) {
if (strstr(value, "stats")) _features.stats = 1;
if (strstr(value, "jnienv")) _features.jnienv = 1;
if (strstr(value, "agct")) _features.agct = 1;
if (strstr(value, "mixed")) _features.mixed = 1;
if (strstr(value, "vtable")) _features.vtable_target = 1;
if (strstr(value, "comptask")) _features.comp_task = 1;
if (strstr(value, "pcaddr")) _features.pc_addr = 1;
}
CASE("file")
if (value == NULL || value[0] == 0) {
msg = "file must not be empty";
}
_file = value;
CASE("log")
_log = value == NULL || value[0] == 0 ? NULL : value;
CASE("loglevel")
if (value == NULL || value[0] == 0) {
msg = "loglevel must not be empty";
}
_loglevel = value;
CASE("quiet")
_quiet = true;
CASE("server")
if (value == NULL || value[0] == 0) {
msg = "server address must not be empty";
}
_server = value;
CASE("fdtransfer")
_fdtransfer = true;
if (value == NULL || value[0] == 0) {
msg = "fdtransfer path must not be empty";
}
_fdtransfer_path = value;
// Filters
CASE("filter")
_filter = value == NULL ? "" : value;
CASE("include")
_include.push_back(value);
CASE("exclude")
_exclude.push_back(value);
CASE("threads")
_threads = true;
CASE("sched")
_sched = true;
CASE("record-cpu")
_record_cpu = true;
CASE("live")
_live = true;
CASE("nobatch")
_nobatch = true;
CASE("alluser")
_alluser = true;
CASE("cstack")
if (value != NULL) {
if (strcmp(value, "fp") == 0) {
_cstack = CSTACK_FP;
} else if (strcmp(value, "dwarf") == 0) {
_cstack = CSTACK_DWARF;
} else if (strcmp(value, "vm") == 0) {
_cstack = CSTACK_VM;
} else if (strcmp(value, "vmx") == 0) {
// cstack=vmx is a shorthand for cstack=vm,features=mixed
_cstack = CSTACK_VM;
_features.mixed = 1;
} else {
_cstack = CSTACK_NO;
}
}
CASE("clock")
if (value != NULL) {
if (value[0] == 't') {
_clock = CLK_TSC;
} else if (value[0] == 'm') {
_clock = CLK_MONOTONIC;
}
}
CASE("target-cpu")
if (value == NULL || (_target_cpu = atoi(value)) < 0) {
_target_cpu = -1;
}
// Output style modifiers
CASE("simple")
_style |= STYLE_SIMPLE;
CASE("dot")
_style |= STYLE_DOTTED;
CASE("norm")
_style |= STYLE_NORMALIZE;
CASE("sig")
_style |= STYLE_SIGNATURES;
CASE("ann")
_style |= STYLE_ANNOTATE;
CASE("lib")
_style |= STYLE_LIB_NAMES;
CASE("mcache")
_mcache = value == NULL ? 1 : (unsigned char)strtol(value, NULL, 0);
CASE("begin")
_begin = value;
CASE("end")
_end = value;
CASE("nostop")
_nostop = true;
CASE("ttsp")
if (_begin != NULL || _end != NULL) {
msg = "begin and end must both be empty when ttsp is set";
}
_begin = "SafepointSynchronize::begin";
_end = "RuntimeService::record_safepoint_synchronized";
// FlameGraph options
CASE("title")
_title = value;
CASE("minwidth")
if (value != NULL) _minwidth = atof(value);
CASE("reverse")
_reverse = true;
CASE("inverted")
_inverted = true;
DEFAULT()
if (_unknown_arg == NULL) _unknown_arg = arg;
}
}
if (_file != NULL && strchr(_file, '%') != NULL) {
_file = expandFilePattern(_buf + len + 1, EXTRA_BUF_SIZE - 1, _file);
// Return error only after parsing all arguments, when 'log' is already set
if (msg != NULL) {
return Error(msg);
}
if (dumpRequested() && (_action == ACTION_NONE || _action == ACTION_STOP)) {
if (_event == NULL && _alloc < 0 && _lock < 0 && _wall < 0 && _nativemem < 0 && _nativelock < 0 && _trace.empty()) {
_event = EVENT_CPU;
}
if (_file != NULL && _output == OUTPUT_NONE) {
_output = detectOutputFormat(_file);
if (_output == OUTPUT_SVG) {
return Error("SVG format is obsolete, use .html for FlameGraph");
}
_dump_traces = 100;
_dump_flat = 200;
}
if (_action == ACTION_NONE && _output != OUTPUT_NONE) {
_action = ACTION_DUMP;
}
return Error::OK;
}
// Expands %p to the process id
// %t to the timestamp
const char* Arguments::expandFilePattern(char* dest, size_t max_size, const char* pattern) {
char* ptr = dest;
char* end = dest + max_size - 1;
const char* Arguments::file() {
if (_file != NULL && strchr(_file, '%') != NULL) {
return expandFilePattern(_file);
}
return _file;
}
// Returns true if the log file is a temporary file of asprof launcher
bool Arguments::hasTemporaryLog() const {
return _log != NULL && strncmp(_log, "/tmp/asprof-log.", 16) == 0;
}
// Should match statically computed HASH(arg)
long long Arguments::hash(const char* arg) {
long long h = 0;
for (int shift = 0; *arg != 0; shift += 5) {
h |= (*arg++ & 31LL) << shift;
}
return h;
}
// Expands the following patterns:
// %p process id
// %t timestamp (yyyyMMdd-hhmmss)
// %n{MAX} sequence number
// %{ENV} environment variable
const char* Arguments::expandFilePattern(const char* pattern) {
char* ptr = _buf;
char* end = _buf + EXTRA_BUF_SIZE - 1;
while (ptr < end && *pattern != 0) {
char c = *pattern++;
@@ -195,41 +480,97 @@ const char* Arguments::expandFilePattern(char* dest, size_t max_size, const char
t.tm_year + 1900, t.tm_mon + 1, t.tm_mday,
t.tm_hour, t.tm_min, t.tm_sec);
continue;
} else if (c == 'n') {
unsigned int max_files = 0;
const char* p;
if (*pattern == '{' && (p = strchr(pattern, '}')) != NULL) {
max_files = atoi(pattern + 1);
pattern = p + 1;
}
ptr += snprintf(ptr, end - ptr, "%u", max_files > 0 ? _file_num % max_files : _file_num);
continue;
} else if (c == '{') {
char env_key[128];
const char* p = strchr(pattern, '}');
if (p != NULL && p - pattern < sizeof(env_key)) {
memcpy(env_key, pattern, p - pattern);
env_key[p - pattern] = 0;
const char* env_value = getenv(env_key);
if (env_value != NULL) {
ptr += snprintf(ptr, end - ptr, "%s", env_value);
pattern = p + 1;
continue;
}
}
}
}
*ptr++ = c;
}
*ptr = 0;
return dest;
*(ptr < end ? ptr : end) = 0;
return _buf;
}
long Arguments::parseUnits(const char* str) {
Output Arguments::detectOutputFormat(const char* file) {
const char* ext = strrchr(file, '.');
if (ext != NULL) {
if (strcmp(ext, ".html") == 0) {
return OUTPUT_FLAMEGRAPH;
} else if (strcmp(ext, ".jfr") == 0) {
return OUTPUT_JFR;
} else if (strcmp(ext, ".collapsed") == 0 || strcmp(ext, ".folded") == 0) {
return OUTPUT_COLLAPSED;
} else if (strcmp(ext, ".svg") == 0) {
return OUTPUT_SVG;
}
}
return OUTPUT_TEXT;
}
long Arguments::parseUnits(const char* str, const Multiplier* multipliers) {
char* end;
long result = strtol(str, &end, 0);
if (end == str) {
return -1;
}
if (*end) {
switch (*end) {
case 'K': case 'k':
case 'U': case 'u': // microseconds
return result * 1000;
case 'M': case 'm': // million, megabytes or milliseconds
return result * 1000000;
case 'G': case 'g':
case 'S': case 's': // seconds
return result * 1000000000;
char c = *end;
if (c == 0) {
return result;
}
if (c >= 'A' && c <= 'Z') {
c += 'a' - 'A';
}
for (const Multiplier* m = multipliers; m->symbol; m++) {
if (c == m->symbol) {
return result * m->multiplier;
}
}
return result;
return -1;
}
int Arguments::parseTimeout(const char* str) {
const char* p = strchr(str, ':');
if (p == NULL) {
return parseUnits(str, SECONDS);
}
int hh = str[0] >= '0' && str[0] <= '2' ? atoi(str) : 0xff;
int mm = p[1] >= '0' && p[1] <= '5' ? atoi(p + 1) : 0xff;
int ss = (p = strchr(p + 1, ':')) != NULL && p[1] >= '0' && p[1] <= '5' ? atoi(p + 1) : 0xff;
return 0xff000000 | hh << 16 | mm << 8 | ss;
}
Arguments::~Arguments() {
free(_buf);
if (!_shared) free(_buf);
}
void Arguments::assign(Arguments& other) {
free(_buf);
*this = other;
other._buf = NULL;
void Arguments::save() {
if (this != &_global_args) {
free(_global_args._buf);
_global_args = *this;
_shared = true;
}
}

View File

@@ -1,63 +1,130 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _ARGUMENTS_H
#define _ARGUMENTS_H
#include <stddef.h>
#include <vector>
const long DEFAULT_INTERVAL = 10000000; // 10 ms
const int DEFAULT_FRAMEBUF = 1000000;
const long DEFAULT_INTERVAL = 10000000; // 10 ms
const long DEFAULT_ALLOC_INTERVAL = 524287; // 512 KiB
const long DEFAULT_LOCK_INTERVAL = 10000; // 10 us
const long DEFAULT_PROC_INTERVAL = 30; // 30 seconds
const int DEFAULT_JSTACKDEPTH = 2048;
const char* const EVENT_CPU = "cpu";
const char* const EVENT_ALLOC = "alloc";
const char* const EVENT_LOCK = "lock";
const char* const EVENT_WALL = "wall";
const char* const EVENT_ITIMER = "itimer";
const char* const EVENT_CPU = "cpu";
const char* const EVENT_ALLOC = "alloc";
const char* const EVENT_NATIVEMEM = "nativemem";
const char* const EVENT_LOCK = "lock";
const char* const EVENT_NATIVELOCK = "nativelock";
const char* const EVENT_WALL = "wall";
const char* const EVENT_CTIMER = "ctimer";
const char* const EVENT_ITIMER = "itimer";
enum Action {
#define SHORT_ENUM __attribute__((__packed__))
enum SHORT_ENUM Action {
ACTION_NONE,
ACTION_START,
ACTION_RESUME,
ACTION_STOP,
ACTION_DUMP,
ACTION_STATUS,
ACTION_METRICS,
ACTION_LIST,
ACTION_VERSION,
ACTION_DUMP
ACTION_VERSION
};
enum Counter {
enum SHORT_ENUM Counter {
COUNTER_SAMPLES,
COUNTER_TOTAL
};
enum Ring {
RING_ANY,
RING_KERNEL,
RING_USER
};
enum Style {
STYLE_SIMPLE = 1,
STYLE_DOTTED = 2,
STYLE_SIGNATURES = 4,
STYLE_ANNOTATE = 8
STYLE_SIMPLE = 0x1,
STYLE_DOTTED = 0x2,
STYLE_NORMALIZE = 0x4,
STYLE_SIGNATURES = 0x8,
STYLE_ANNOTATE = 0x10,
STYLE_LIB_NAMES = 0x20,
STYLE_NO_SEMICOLON = 0x40
};
// Whenever enum changes, update SETTING_CSTACK in FlightRecorder
enum SHORT_ENUM CStack {
CSTACK_DEFAULT, // use perf_event_open stack if available or Frame Pointer links otherwise
CSTACK_NO, // do not collect native frames
CSTACK_FP, // walk stack using Frame Pointer links
CSTACK_DWARF, // use DWARF unwinding info from .eh_frame section
CSTACK_VM // unwind using HotSpot VMStructs
};
enum SHORT_ENUM Clock {
CLK_DEFAULT,
CLK_TSC,
CLK_MONOTONIC
};
enum SHORT_ENUM Output {
OUTPUT_NONE,
OUTPUT_TEXT,
OUTPUT_SVG, // obsolete
OUTPUT_COLLAPSED,
OUTPUT_FLAMEGRAPH,
OUTPUT_TREE,
OUTPUT_JFR,
OUTPUT_OTLP
};
enum JfrOption {
NO_SYSTEM_INFO = 0x1,
NO_SYSTEM_PROPS = 0x2,
NO_NATIVE_LIBS = 0x4,
NO_CPU_LOAD = 0x8,
NO_HEAP_SUMMARY = 0x10,
IN_MEMORY = 0x100,
JFR_SYNC_OPTS = NO_SYSTEM_INFO | NO_SYSTEM_PROPS | NO_NATIVE_LIBS | NO_CPU_LOAD | NO_HEAP_SUMMARY
};
// Keep this in sync with JfrSync.java
enum EventMask {
EM_CPU = 1,
EM_ALLOC = 2,
EM_LOCK = 4,
EM_WALL = 8,
EM_NATIVEMEM = 16,
EM_NATIVELOCK = 32,
EM_METHOD_TRACE = 64
};
constexpr int EVENT_MASK_SIZE = 7;
struct StackWalkFeatures {
unsigned short stats : 1; // collect stack walking duration statistics
unsigned short jnienv : 1; // verify JNIEnv* obtained using VMStructs
unsigned short agct : 1; // force usage of AsyncGetCallTrace instead of VMStructs
unsigned short mixed : 1; // mixed stack traces with Java and native frames interleaved
unsigned short vtable_target : 1; // show receiver classes of vtable/itable stubs
unsigned short comp_task : 1; // display current compilation task for JIT threads
unsigned short pc_addr : 1; // record exact PC address for each sample
unsigned short _padding : 9; // pad structure to 16 bits
};
struct Multiplier {
char symbol;
long multiplier;
};
constexpr Multiplier NANOS[] = {{'n', 1}, {'u', 1000}, {'m', 1000000}, {'s', 1000000000}, {0, 0}};
constexpr Multiplier BYTES[] = {{'b', 1}, {'k', 1024}, {'m', 1048576}, {'g', 1073741824}, {0, 0}};
constexpr Multiplier SECONDS[] = {{'s', 1}, {'m', 60}, {'h', 3600}, {'d', 86400}, {0, 0}};
constexpr Multiplier UNIVERSAL[] = {{'n', 1}, {'u', 1000}, {'m', 1000000}, {'s', 1000000000}, {'b', 1}, {'k', 1024}, {'g', 1073741824}, {0, 0}};
class Error {
private:
@@ -82,70 +149,172 @@ class Error {
class Arguments {
private:
char* _buf;
bool _shared;
const char* expandFilePattern(char* dest, size_t max_size, const char* pattern);
long parseUnits(const char* str);
const char* expandFilePattern(const char* pattern);
static long long hash(const char* arg);
static Output detectOutputFormat(const char* file);
static int parseTimeout(const char* str);
public:
Action _action;
Counter _counter;
Ring _ring;
const char* _event;
std::vector<const char*> _trace;
int _timeout;
int _loop;
size_t _mem_limit;
long _interval;
int _jstackdepth;
int _framebuf;
bool _threads;
int _style;
long _alloc;
long _nativemem;
long _lock;
long _nativelock;
long _wall;
long _proc;
bool _all;
int _jstackdepth;
int _truncated_stack_depth;
int _signal;
const char* _file;
bool _dump_collapsed;
bool _dump_flamegraph;
bool _dump_tree;
bool _dump_jfr;
bool _dump_summary;
const char* _log;
const char* _loglevel;
const char* _unknown_arg;
const char* _server;
const char* _filter;
std::vector<const char*> _include;
std::vector<const char*> _exclude;
unsigned char _mcache;
bool _preloaded;
bool _quiet;
bool _threads;
bool _sched;
bool _record_cpu;
bool _tlab;
bool _live;
bool _nofree;
bool _nobatch;
bool _nostop;
bool _alluser;
bool _fdtransfer;
const char* _fdtransfer_path;
int _target_cpu;
int _style;
StackWalkFeatures _features;
CStack _cstack;
Clock _clock;
Output _output;
long _chunk_size;
long _chunk_time;
const char* _jfr_sync;
int _jfr_options;
int _dump_traces;
int _dump_flat;
unsigned int _file_num;
const char* _begin;
const char* _end;
// FlameGraph parameters
const char* _title;
int _width;
int _height;
double _minwidth;
bool _reverse;
bool _inverted;
Arguments() :
_buf(NULL),
_shared(false),
_action(ACTION_NONE),
_counter(COUNTER_SAMPLES),
_ring(RING_ANY),
_event(EVENT_CPU),
_event(NULL),
_trace(),
_timeout(0),
_loop(0),
_mem_limit(0),
_interval(0),
_alloc(-1),
_nativemem(-1),
_lock(-1),
_nativelock(-1),
_wall(-1),
_proc(-1),
_all(false),
_jstackdepth(DEFAULT_JSTACKDEPTH),
_framebuf(DEFAULT_FRAMEBUF),
_threads(false),
_style(0),
_truncated_stack_depth(DEFAULT_JSTACKDEPTH),
_signal(0),
_file(NULL),
_dump_collapsed(false),
_dump_flamegraph(false),
_dump_tree(false),
_dump_jfr(false),
_dump_summary(false),
_log(NULL),
_loglevel(NULL),
_unknown_arg(NULL),
_server(NULL),
_filter(NULL),
_include(),
_exclude(),
_mcache(0),
_preloaded(false),
_quiet(false),
_threads(false),
_sched(false),
_record_cpu(false),
_tlab(false),
_live(false),
_nofree(false),
_nobatch(false),
_nostop(false),
_alluser(false),
_fdtransfer(false),
_fdtransfer_path(NULL),
_target_cpu(-1),
_style(0),
_features{},
_cstack(CSTACK_DEFAULT),
_clock(CLK_DEFAULT),
_output(OUTPUT_NONE),
_chunk_size(100 * 1024 * 1024),
_chunk_time(3600),
_jfr_sync(NULL),
_jfr_options(0),
_dump_traces(0),
_dump_flat(0),
_title("Flame Graph"),
_width(1200),
_height(16),
_minwidth(0.25),
_reverse(false) {
_file_num(0),
_begin(NULL),
_end(NULL),
_title(NULL),
_minwidth(0),
_reverse(false),
_inverted(false) {
}
~Arguments();
void assign(Arguments& other);
void save();
Error parse(const char* args);
bool dumpRequested() {
return _dump_collapsed || _dump_flamegraph || _dump_tree || _dump_jfr || _dump_summary || _dump_traces > 0 || _dump_flat > 0;
const char* file();
bool hasTemporaryLog() const;
bool hasOutputFile() const {
return _file != NULL &&
(_action == ACTION_STOP || _action == ACTION_DUMP ? _output != OUTPUT_JFR : _action >= ACTION_STATUS);
}
bool hasOption(JfrOption option) const {
return (_jfr_options & option) != 0;
}
int eventMask() const {
return (_event != NULL ? EM_CPU : 0) |
(_alloc >= 0 ? EM_ALLOC : 0) |
(_lock >= 0 ? EM_LOCK : 0) |
(_wall >= 0 ? EM_WALL : 0) |
(_nativemem >= 0 ? EM_NATIVEMEM : 0) |
(_nativelock >= 0 ? EM_NATIVELOCK : 0) |
(!_trace.empty() ? EM_METHOD_TRACE : 0);
}
static long parseUnits(const char* str, const Multiplier* multipliers);
};
extern Arguments _global_args;
#endif // _ARGUMENTS_H

77
src/asprof.cpp Normal file
View File

@@ -0,0 +1,77 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#include "asprof.h"
#include "hooks.h"
#include "profiler.h"
#include "tsc.h"
#include "threadLocalData.h"
#include "userEvents.h"
static asprof_error_t asprof_error(const char* msg) {
return (asprof_error_t)msg;
}
DLLEXPORT void asprof_init() {
Hooks::init(true);
}
DLLEXPORT const char* asprof_error_str(asprof_error_t err) {
return err;
}
DLLEXPORT asprof_error_t asprof_execute(const char* command, asprof_writer_t output_callback) {
Arguments args;
Error error = args.parse(command);
if (error) {
return asprof_error(error.message());
}
Log::open(args);
if (!args.hasOutputFile()) {
CallbackWriter out(output_callback);
error = Profiler::instance()->runInternal(args, out);
if (!error) {
return NULL;
}
} else {
FileWriter out(args.file());
if (!out.is_open()) {
return asprof_error("Could not open output file");
}
error = Profiler::instance()->runInternal(args, out);
if (!error) {
return NULL;
}
}
return asprof_error(error.message());
}
DLLEXPORT asprof_thread_local_data* asprof_get_thread_local_data(void) {
return ThreadLocalData::getThreadLocalData();
}
DLLEXPORT asprof_jfr_event_key asprof_register_jfr_event(const char* name) {
return UserEvents::registerEvent(name);
}
#define asprof_str(s) #s
DLLEXPORT asprof_error_t asprof_emit_jfr_event(asprof_jfr_event_key type, const uint8_t* data, size_t len) {
if (len > ASPROF_MAX_JFR_EVENT_LENGTH) {
return asprof_error("Unable to emit JFR event larger than " asprof_str(ASPROF_MAX_JFR_EVENT_LENGTH) " bytes");
}
UserEvent event;
event._start_time = TSC::ticks();
event._type = type;
event._data = data;
event._len = len;
Profiler::instance()->recordEventOnly(USER_EVENT, &event);
return NULL;
}

106
src/asprof.h Normal file
View File

@@ -0,0 +1,106 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _ASPROF_H
#define _ASPROF_H
#include <stddef.h>
#include <stdint.h>
#ifdef __clang__
# define DLLEXPORT __attribute__((visibility("default")))
#else
# define DLLEXPORT __attribute__((visibility("default"),externally_visible))
#endif
#define WEAK __attribute__((weak))
#ifdef __cplusplus
extern "C" {
#endif
typedef const char* asprof_error_t;
typedef void (*asprof_writer_t)(const char* buf, size_t size);
// Should be called once prior to any other API functions
DLLEXPORT void asprof_init();
typedef void (*asprof_init_t)();
// Returns an error message for the given error code or NULL if there is no error
DLLEXPORT const char* asprof_error_str(asprof_error_t err);
typedef const char* (*asprof_error_str_t)(asprof_error_t err);
// Executes async-profiler command using output_callback as an optional sink
// for the profiler output. Returning an error code or NULL on success.
DLLEXPORT asprof_error_t asprof_execute(const char* command, asprof_writer_t output_callback);
typedef asprof_error_t (*asprof_execute_t)(const char* command, asprof_writer_t output_callback);
// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
typedef struct {
// A thread-local sample counter, which increments (not necessarily by 1) every time a
// stack profiling sample is taken using a profiling signal.
//
// The counter might be initialized lazily, only starting counting from 0 the first time
// `asprof_get_thread_local_data` is called on a given thread. Further calls to
// `asprof_get_thread_local_data` on a given thread will of course not reset the counter.
volatile uint64_t sample_counter;
} asprof_thread_local_data;
// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
//
// Gets a pointer to asprof's thread-local data structure, see `asprof_thread_local_data`'s
// documentation for the details of each field. This function might lazily initialize that
// structure.
//
// This function can return NULL either if the profiler is not yet initializer, or in
// case of an allocation failure.
//
// This function is *not* async-signal-safe. However, it is safe to call concurrently
// with async-profiler operations, including initialization.
DLLEXPORT asprof_thread_local_data* asprof_get_thread_local_data(void);
typedef asprof_thread_local_data* (*asprof_get_thread_local_data_t)(void);
typedef int asprof_jfr_event_key;
// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
//
// Return a asprof_jfr_event_key identifier for a user-defined JFR key.
// That identifier can then be used in `asprof_emit_jfr_event`
//
// The name is required to be valid (since it's a C string, NUL-free) UTF-8.
//
// Returns -1 on failure.
DLLEXPORT asprof_jfr_event_key asprof_register_jfr_event(const char* name);
typedef asprof_jfr_event_key (*asprof_register_jfr_event_t)(const char* name);
#define ASPROF_MAX_JFR_EVENT_LENGTH 2048
// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
//
// Emits a custom, user-defined JFR event. The key should be created via `asprof_register_jfr_event`.
// The data can be arbitrary binary data, with size <= ASPROF_MAX_JFR_EVENT_LENGTH.
//
// User-defined events are included in the JFR under a `profiler.UserEvent` event type. That type will contain
// (at least) the following fields:
// 1. `startTime` [Long] - the emitted event's time in ticks.
// 2. `eventThread` [java.lang.Thread] - the thread that emitted the events.
// 3. `type` [profiler.types.UserEventType] - the event's type,
// where `profiler.types.UserEventType` is an indexed string from the JFR constant pool.
// 4. `data` [String] - the event data. This is the Latin-1 encoded version of the inputted data.
// The Latin-1 encoding is used as a way to stuff the arbitrary byte input into something
// that JFR supports (JFR technically supports byte arrays, but `jfr print` doesn't).
//
// Returns an error code or NULL on success.
DLLEXPORT asprof_error_t asprof_emit_jfr_event(asprof_jfr_event_key type, const uint8_t* data, size_t len);
typedef asprof_error_t (*asprof_emit_jfr_event_t)(asprof_jfr_event_key type, const uint8_t* data, size_t len);
#ifdef __cplusplus
}
#endif
#endif // _ASPROF_H

323
src/callTraceStorage.cpp Normal file
View File

@@ -0,0 +1,323 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdint.h>
#include <string.h>
#include "callTraceStorage.h"
#include "os.h"
#define COMMA ,
static const u32 INITIAL_CAPACITY = 65536;
static const u32 CALL_TRACE_CHUNK = 8 * 1024 * 1024;
static const u32 OVERFLOW_TRACE_ID = 0x7fffffff;
static const size_t MEM_LIMIT_EXTRA = 0x10000; // reserve up to 64 KB for LongHashTable headers
class LongHashTable {
private:
LongHashTable* _prev;
void* _padding0;
u32 _capacity;
u32 _padding1[15];
volatile u32 _size;
u32 _padding2[15];
static size_t getSize(u32 capacity) {
size_t size = sizeof(LongHashTable) + (sizeof(u64) + sizeof(CallTraceSample)) * capacity;
return (size + OS::page_mask) & ~OS::page_mask;
}
public:
static LongHashTable* allocate(LongHashTable* prev, u32 capacity) {
LongHashTable* table = (LongHashTable*)OS::safeAlloc(getSize(capacity));
if (table != NULL) {
table->_prev = prev;
table->_capacity = capacity;
table->_size = 0;
}
return table;
}
LongHashTable* destroy() {
LongHashTable* prev = _prev;
OS::safeFree(this, getSize(_capacity));
return prev;
}
size_t usedMemory() {
return getSize(_capacity);
}
LongHashTable* prev() {
return _prev;
}
u32 capacity() {
return _capacity;
}
u32 size() {
return _size;
}
u32 incSize() {
return __sync_add_and_fetch(&_size, 1);
}
u64* keys() {
return (u64*)(this + 1);
}
CallTraceSample* values() {
return (CallTraceSample*)(keys() + _capacity);
}
void clear() {
memset(keys(), 0, (sizeof(u64) + sizeof(CallTraceSample)) * _capacity);
_size = 0;
}
};
CallTrace CallTraceStorage::_overflow_trace = {1, {BCI_ERROR, LP64_ONLY(0 COMMA) (jmethodID)"storage_overflow"}};
CallTraceStorage::CallTraceStorage() : _allocator(CALL_TRACE_CHUNK) {
_current_table = LongHashTable::allocate(NULL, INITIAL_CAPACITY);
_used_memory = _current_table->usedMemory();
_mem_limit = SIZE_MAX;
_overflow = 0;
}
CallTraceStorage::~CallTraceStorage() {
while (_current_table != NULL) {
_current_table = _current_table->destroy();
}
}
void CallTraceStorage::clear(size_t mem_limit) {
while (_current_table->prev() != NULL) {
_current_table = _current_table->destroy();
}
_current_table->clear();
_used_memory = _current_table->usedMemory();
_allocator.clear();
_mem_limit = mem_limit ? mem_limit | MEM_LIMIT_EXTRA : SIZE_MAX;
_overflow = 0;
}
u32 CallTraceStorage::capacity() {
// As capacity of each subsequent table doubles,
// total capacity is a sum of geometric series: 64K + 128K + 256K...
return _current_table->capacity() * 2 - INITIAL_CAPACITY;
}
size_t CallTraceStorage::usedMemory() {
return _used_memory + _allocator.usedMemory();
}
void CallTraceStorage::collectTraces(std::map<u32, CallTrace*>& map) {
for (LongHashTable* table = _current_table; table != NULL; table = table->prev()) {
u64* keys = table->keys();
CallTraceSample* values = table->values();
u32 capacity = table->capacity();
for (u32 slot = 0; slot < capacity; slot++) {
if (keys[slot] != 0 && loadAcquire(values[slot].samples) != 0) {
// Reset samples to avoid duplication of call traces between JFR chunks
values[slot].samples = 0;
CallTrace* trace = values[slot].acquireTrace();
if (trace != NULL) {
map[capacity - (INITIAL_CAPACITY - 1) + slot] = trace;
}
}
}
}
if (_overflow > 0) {
map[OVERFLOW_TRACE_ID] = &_overflow_trace;
}
}
void CallTraceStorage::collectSamples(std::vector<CallTraceSample*>& samples) {
for (LongHashTable* table = _current_table; table != NULL; table = table->prev()) {
u64* keys = table->keys();
CallTraceSample* values = table->values();
u32 capacity = table->capacity();
for (u32 slot = 0; slot < capacity; slot++) {
if (keys[slot] != 0) {
samples.push_back(&values[slot]);
}
}
}
}
void CallTraceStorage::collectSamples(std::map<u64, CallTraceSample>& map) {
for (LongHashTable* table = _current_table; table != NULL; table = table->prev()) {
u64* keys = table->keys();
CallTraceSample* values = table->values();
u32 capacity = table->capacity();
for (u32 slot = 0; slot < capacity; slot++) {
if (keys[slot] != 0 && values[slot].acquireTrace() != NULL) {
map[keys[slot]] += values[slot];
}
}
}
}
// Adaptation of MurmurHash64A by Austin Appleby
u64 CallTraceStorage::calcHash(int num_frames, ASGCT_CallFrame* frames) {
const u64 M = 0xc6a4a7935bd1e995ULL;
const int R = 47;
int len = num_frames * sizeof(ASGCT_CallFrame);
u64 h = len * M;
const u64* data = (const u64*)frames;
const u64* end = data + len / 8;
while (data != end) {
u64 k = *data++;
k *= M;
k ^= k >> R;
k *= M;
h ^= k;
h *= M;
}
if (len & 4) {
h ^= *(u32*)data;
h *= M;
}
h ^= h >> R;
h *= M;
h ^= h >> R;
return h;
}
CallTrace* CallTraceStorage::storeCallTrace(int num_frames, ASGCT_CallFrame* frames) {
const size_t header_size = sizeof(CallTrace) - sizeof(ASGCT_CallFrame);
CallTrace* buf = (CallTrace*)_allocator.alloc(header_size + num_frames * sizeof(ASGCT_CallFrame));
if (buf != NULL) {
buf->num_frames = num_frames;
// Do not use memcpy inside signal handler
for (int i = 0; i < num_frames; i++) {
buf->frames[i] = frames[i];
}
}
return buf;
}
CallTrace* CallTraceStorage::findCallTrace(LongHashTable* table, u64 hash) {
u64* keys = table->keys();
u32 capacity = table->capacity();
u32 slot = hash & (capacity - 1);
u32 step = 0;
while (keys[slot] != hash) {
if (keys[slot] == 0) {
return NULL;
}
if (++step >= capacity) {
return NULL;
}
slot = (slot + step) & (capacity - 1);
}
return table->values()[slot].trace;
}
u32 CallTraceStorage::put(int num_frames, ASGCT_CallFrame* frames, u64 counter) {
u64 hash = calcHash(num_frames, frames);
LongHashTable* table = _current_table;
u64* keys = table->keys();
u32 capacity = table->capacity();
u32 slot = hash & (capacity - 1);
u32 step = 0;
while (keys[slot] != hash) {
if (keys[slot] == 0) {
if (usedMemory() > _mem_limit) {
// Stop adding new stack traces once memory limit is exceeded
atomicInc(_overflow);
return OVERFLOW_TRACE_ID;
}
if (!__sync_bool_compare_and_swap(&keys[slot], 0, hash)) {
continue;
}
// Increment the table size, and if the load factor exceeds 0.75, reserve a new table.
// This condition can be hit only once per table, so the below allocation is race-free.
if (table->incSize() == capacity * 3 / 4) {
LongHashTable* new_table = LongHashTable::allocate(table, capacity * 2);
if (new_table != NULL) {
atomicInc(_used_memory, new_table->usedMemory());
storeRelease(_current_table, new_table);
}
}
// Migrate from a previous table to save space
CallTrace* trace = table->prev() == NULL ? NULL : findCallTrace(table->prev(), hash);
if (trace == NULL) {
trace = storeCallTrace(num_frames, frames);
}
table->values()[slot].setTrace(trace);
break;
}
if (++step >= capacity) {
// Very unlikely case of a table overflow
atomicInc(_overflow);
return OVERFLOW_TRACE_ID;
}
// Improved version of linear probing
slot = (slot + step) & (capacity - 1);
}
if (counter != 0) {
CallTraceSample& s = table->values()[slot];
atomicInc(s.samples);
atomicInc(s.counter, counter);
}
return capacity - (INITIAL_CAPACITY - 1) + slot;
}
void CallTraceStorage::add(u32 call_trace_id, u64 samples, u64 counter) {
if (call_trace_id > capacity()) { // this also covers call_trace_id == OVERFLOW_TRACE_ID
return;
}
call_trace_id += (INITIAL_CAPACITY - 1);
for (LongHashTable* table = _current_table; table != NULL; table = table->prev()) {
if (call_trace_id >= table->capacity()) {
CallTraceSample& s = table->values()[call_trace_id - table->capacity()];
atomicInc(s.samples, samples);
atomicInc(s.counter, counter);
break;
}
}
}
void CallTraceStorage::resetCounters() {
for (LongHashTable* table = _current_table; table != NULL; table = table->prev()) {
u64* keys = table->keys();
CallTraceSample* values = table->values();
u32 capacity = table->capacity();
for (u32 slot = 0; slot < capacity; slot++) {
if (keys[slot] != 0) {
CallTraceSample& s = values[slot];
storeRelease(s.samples, 0);
storeRelease(s.counter, 0);
}
}
}
}

76
src/callTraceStorage.h Normal file
View File

@@ -0,0 +1,76 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _CALLTRACESTORAGE_H
#define _CALLTRACESTORAGE_H
#include <map>
#include <vector>
#include "arch.h"
#include "linearAllocator.h"
#include "vmEntry.h"
class LongHashTable;
struct CallTrace {
int num_frames;
ASGCT_CallFrame frames[1];
};
struct CallTraceSample {
CallTrace* trace;
u64 samples;
u64 counter;
CallTrace* acquireTrace() {
return loadAcquire(trace);
}
void setTrace(CallTrace* value) {
storeRelease(trace, value);
}
CallTraceSample& operator+=(const CallTraceSample& s) {
trace = s.trace;
samples += s.samples;
counter += s.counter;
return *this;
}
};
class CallTraceStorage {
private:
static CallTrace _overflow_trace;
LinearAllocator _allocator;
LongHashTable* _current_table;
size_t _used_memory;
size_t _mem_limit;
u64 _overflow;
u64 calcHash(int num_frames, ASGCT_CallFrame* frames);
CallTrace* storeCallTrace(int num_frames, ASGCT_CallFrame* frames);
CallTrace* findCallTrace(LongHashTable* table, u64 hash);
public:
CallTraceStorage();
~CallTraceStorage();
void clear(size_t mem_limit);
u32 capacity();
size_t usedMemory();
u64 overflow() { return _overflow; }
void collectTraces(std::map<u32, CallTrace*>& map);
void collectSamples(std::vector<CallTraceSample*>& samples);
void collectSamples(std::map<u64, CallTraceSample>& map);
u32 put(int num_frames, ASGCT_CallFrame* frames, u64 counter);
void add(u32 call_trace_id, u64 samples, u64 counter);
void resetCounters();
};
#endif // _CALLTRACESTORAGE

32
src/chk.cpp Normal file
View File

@@ -0,0 +1,32 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef __clang__
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include "asprof.h"
// libgcc refers to __sprintf_chk, but there is no such symbol in musl libc.
// Export a weak symbol in order to make profiler library work both with glibc and musl.
extern "C" WEAK DLLEXPORT
int __sprintf_chk(char* s, int flag, size_t slen, const char* format, ...) {
va_list args;
va_start(args, format);
int ret = vsnprintf(s, slen, format, args);
va_end(args);
if (ret >= slen) {
fprintf(stderr, "__sprintf_chk failed\n");
abort();
}
return ret;
}
#endif // __clang__

View File

@@ -1,90 +1,136 @@
/*
* Copyright 2016 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include "codeCache.h"
#include "dwarf.h"
#include "log.h"
#include "os.h"
char* NativeFunc::create(const char* name, short lib_index) {
NativeFunc* f = (NativeFunc*)malloc(sizeof(NativeFunc) + 1 + strlen(name));
f->_lib_index = lib_index;
f->_mark = 0;
return strcpy(f->_name, name);
}
void NativeFunc::destroy(char* name) {
free(from(name));
}
size_t NativeFunc::usedMemory(const char* name) {
return sizeof(NativeFunc) + 1 + strlen(from(name)->_name);
}
CodeCache::CodeCache(const char* name, short lib_index,
const void* min_address, const void* max_address,
const char* image_base) {
_name = NativeFunc::create(name, -1);
_lib_index = lib_index;
_min_address = min_address;
_max_address = max_address;
_text_base = NULL;
_image_base = image_base;
_plt_offset = 0;
_plt_size = 0;
memset(_imports, 0, sizeof(_imports));
_imports_patchable = false;
_debug_symbols = false;
_dwarf_table = NULL;
_dwarf_table_length = 0;
_capacity = INITIAL_CODE_CACHE_CAPACITY;
_count = 0;
_blobs = new CodeBlob[_capacity];
}
CodeCache::~CodeCache() {
for (int i = 0; i < _count; i++) {
NativeFunc::destroy(_blobs[i]._name);
}
NativeFunc::destroy(_name);
delete[] _blobs;
free(_dwarf_table);
}
void CodeCache::expand() {
CodeBlob* old_blobs = _blobs;
CodeBlob* new_blobs = new CodeBlob[_capacity * 2];
memcpy(new_blobs, old_blobs, _capacity * sizeof(CodeBlob));
memcpy(new_blobs, old_blobs, _count * sizeof(CodeBlob));
_capacity *= 2;
_blobs = new_blobs;
delete[] old_blobs;
}
void CodeCache::add(const void* start, int length, jmethodID method) {
void CodeCache::add(const void* start, int length, const char* name, bool update_bounds) {
char* name_copy = NativeFunc::create(name, _lib_index);
// Replace non-printable characters
for (char* s = name_copy; *s != 0; s++) {
if (*s < ' ') *s = '?';
}
if (_count >= _capacity) {
expand();
}
const void* end = (const char*)start + length;
_blobs[_count]._start = start;
_blobs[_count]._end = (const char*)start + length;
_blobs[_count]._method = method;
_blobs[_count]._end = end;
_blobs[_count]._name = name_copy;
_count++;
}
void CodeCache::remove(const void* start, jmethodID method) {
for (int i = 0; i < _count; i++) {
if (_blobs[i]._start == start && _blobs[i]._method == method) {
_blobs[i]._method = NULL;
return;
}
if (update_bounds) {
updateBounds(start, end);
}
}
jmethodID CodeCache::find(const void* address) {
void CodeCache::updateBounds(const void* start, const void* end) {
if (start < _min_address) _min_address = start;
if (end > _max_address) _max_address = end;
}
void CodeCache::sort() {
if (_count == 0) return;
qsort(_blobs, _count, sizeof(CodeBlob), CodeBlob::comparator);
if (_min_address == NO_MIN_ADDRESS) _min_address = _blobs[0]._start;
if (_max_address == NO_MAX_ADDRESS) _max_address = _blobs[_count - 1]._end;
}
CodeBlob* CodeCache::findBlob(const char* name) {
for (int i = 0; i < _count; i++) {
if (address >= _blobs[i]._start && address < _blobs[i]._end) {
return _blobs[i]._method;
const char* blob_name = _blobs[i]._name;
if (blob_name != NULL && strcmp(blob_name, name) == 0) {
return &_blobs[i];
}
}
return NULL;
}
NativeCodeCache::NativeCodeCache(const char* name, const void* min_address, const void* max_address) {
_name = strdup(name);
_min_address = min_address;
_max_address = max_address;
}
NativeCodeCache::~NativeCodeCache() {
CodeBlob* CodeCache::findBlobByAddress(const void* address) {
for (int i = 0; i < _count; i++) {
free(_blobs[i]._method);
if (address >= _blobs[i]._start && address < _blobs[i]._end) {
return &_blobs[i];
}
}
free(_name);
return NULL;
}
void NativeCodeCache::add(const void* start, int length, const char* name) {
CodeCache::add(start, length, (jmethodID)strdup(name));
}
void NativeCodeCache::sort() {
if (_count == 0) return;
qsort(_blobs, _count, sizeof(CodeBlob), CodeBlob::comparator);
if (_min_address == NULL) _min_address = _blobs[0]._start;
if (_max_address == NULL) _max_address = _blobs[_count - 1]._end;
}
const char* NativeCodeCache::binarySearch(const void* address) {
const char* CodeCache::binarySearch(const void* address) {
int low = 0;
int high = _count - 1;
@@ -95,34 +141,187 @@ const char* NativeCodeCache::binarySearch(const void* address) {
} else if (_blobs[mid]._start > address) {
high = mid - 1;
} else {
return (const char*)_blobs[mid]._method;
return _blobs[mid]._name;
}
}
// Symbols with zero size can be valid functions: e.g. ASM entry points or kernel code
if (low > 0 && _blobs[low - 1]._start == _blobs[low - 1]._end) {
return (const char*)_blobs[low - 1]._method;
// Symbols with zero size can be valid functions: e.g. ASM entry points or kernel code.
// Also, in some cases (endless loop) the return address may point beyond the function.
if (low > 0 && (_blobs[low - 1]._start == _blobs[low - 1]._end || _blobs[low - 1]._end == address)) {
return _blobs[low - 1]._name;
}
return _name;
}
const void* NativeCodeCache::findSymbol(const char* name) {
for (int i = 0; i < _count; i++) {
const char* blob_name = (const char*)_blobs[i]._method;
if (blob_name != NULL && strcmp(blob_name, name) == 0) {
return _blobs[i]._start;
}
}
return NULL;
const void* CodeCache::findSymbol(const char* name) {
CodeBlob* blob = findBlob(name);
return blob == NULL ? NULL : blob->_start;
}
const void* NativeCodeCache::findSymbolByPrefix(const char* prefix) {
int prefix_len = strlen(prefix);
const void* CodeCache::findSymbolByPrefix(const char* prefix) {
return findSymbolByPrefix(prefix, strlen(prefix));
}
const void* CodeCache::findSymbolByPrefix(const char* prefix, int prefix_len) {
const void* result = NULL;
for (int i = 0; i < _count; i++) {
const char* blob_name = (const char*)_blobs[i]._method;
const char* blob_name = _blobs[i]._name;
if (blob_name != NULL && strncmp(blob_name, prefix, prefix_len) == 0) {
return _blobs[i]._start;
result = _blobs[i]._start;
// Symbols which contain a dot are only patched if no alternative is found,
// see #1247
if (strchr(blob_name + prefix_len, '.') == NULL) {
return result;
}
}
}
return NULL;
return result;
}
void CodeCache::saveImport(ImportId id, void** entry) {
for (int ty = 0; ty < NUM_IMPORT_TYPES; ty++) {
if (_imports[id][ty] == nullptr) {
_imports[id][ty] = entry;
return;
}
}
}
void CodeCache::addImport(void** entry, const char* name) {
switch (name[0]) {
case 'a':
if (strcmp(name, "aligned_alloc") == 0) {
saveImport(im_aligned_alloc, entry);
}
break;
case 'c':
if (strcmp(name, "calloc") == 0) {
saveImport(im_calloc, entry);
}
break;
case 'd':
if (strcmp(name, "dlopen") == 0) {
saveImport(im_dlopen, entry);
}
break;
case 'f':
if (strcmp(name, "free") == 0) {
saveImport(im_free, entry);
}
break;
case 'm':
if (strcmp(name, "malloc") == 0) {
saveImport(im_malloc, entry);
}
break;
case 'p':
if (strcmp(name, "pthread_create") == 0) {
saveImport(im_pthread_create, entry);
} else if (strcmp(name, "pthread_exit") == 0) {
saveImport(im_pthread_exit, entry);
} else if (strcmp(name, "pthread_mutex_lock") == 0) {
saveImport(im_pthread_mutex_lock, entry);
} else if (strcmp(name, "pthread_rwlock_rdlock") == 0) {
saveImport(im_pthread_rwlock_rdlock, entry);
} else if (strcmp(name, "pthread_rwlock_wrlock") == 0) {
saveImport(im_pthread_rwlock_wrlock, entry);
} else if (strcmp(name, "pthread_setspecific") == 0) {
saveImport(im_pthread_setspecific, entry);
} else if (strcmp(name, "poll") == 0) {
saveImport(im_poll, entry);
} else if (strcmp(name, "posix_memalign") == 0) {
saveImport(im_posix_memalign, entry);
}
break;
case 'r':
if (strcmp(name, "realloc") == 0) {
saveImport(im_realloc, entry);
}
break;
}
}
void** CodeCache::findImport(ImportId id) {
if (!_imports_patchable) {
makeImportsPatchable();
}
return _imports[id][PRIMARY];
}
void CodeCache::patchImport(ImportId id, void* hook_func) {
if (!_imports_patchable && !makeImportsPatchable()) {
return;
}
for (int ty = 0; ty < NUM_IMPORT_TYPES; ty++) {
void** entry = _imports[id][ty];
if (entry != NULL) {
*entry = hook_func;
}
}
}
bool CodeCache::makeImportsPatchable() {
void** min_import = (void**)-1;
void** max_import = NULL;
for (int i = 0; i < NUM_IMPORTS; i++) {
for (int j = 0; j < NUM_IMPORT_TYPES; j++) {
void** entry = _imports[i][j];
if (entry == NULL) continue;
if (entry < min_import) min_import = entry;
if (entry > max_import) max_import = entry;
}
}
if (max_import != NULL) {
uintptr_t patch_start = (uintptr_t)min_import & ~OS::page_mask;
uintptr_t patch_end = (uintptr_t)max_import & ~OS::page_mask;
if (OS::mprotect((void*)patch_start, patch_end - patch_start + OS::page_size, PROT_READ | PROT_WRITE) != 0) {
Log::warn("Could not patch %s", name());
return false;
}
}
_imports_patchable = true;
return true;
}
void CodeCache::setDwarfTable(FrameDesc* table, int length) {
_dwarf_table = table;
_dwarf_table_length = length;
}
FrameDesc* CodeCache::findFrameDesc(const void* pc) {
u32 target_loc = (const char*)pc - _text_base;
int low = 0;
int high = _dwarf_table_length - 1;
while (low <= high) {
int mid = (unsigned int)(low + high) >> 1;
if (_dwarf_table[mid].loc < target_loc) {
low = mid + 1;
} else if (_dwarf_table[mid].loc > target_loc) {
high = mid - 1;
} else {
return &_dwarf_table[mid];
}
}
if (low > 0) {
return &_dwarf_table[low - 1];
} else if (target_loc - _plt_offset < _plt_size) {
return &FrameDesc::empty_frame;
} else {
return &FrameDesc::default_frame;
}
}
size_t CodeCache::usedMemory() {
size_t bytes = _capacity * sizeof(CodeBlob);
bytes += _dwarf_table_length * sizeof(FrameDesc);
bytes += NativeFunc::usedMemory(_name);
for (int i = 0; i < _count; i++) {
bytes += NativeFunc::usedMemory(_blobs[i]._name);
}
return bytes + sizeof(CodeCache);
}

View File

@@ -1,33 +1,90 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _CODECACHE_H
#define _CODECACHE_H
#include <jvmti.h>
#include "arch.h"
#define NO_MIN_ADDRESS ((const void*)-1)
#define NO_MAX_ADDRESS ((const void*)0)
const int INITIAL_CODE_CACHE_CAPACITY = 1000;
const int MAX_NATIVE_LIBS = 2048;
enum ImportId {
im_dlopen,
im_pthread_create,
im_pthread_exit,
im_pthread_mutex_lock,
im_pthread_rwlock_rdlock,
im_pthread_rwlock_wrlock,
im_pthread_setspecific,
im_poll,
im_malloc,
im_calloc,
im_realloc,
im_free,
im_posix_memalign,
im_aligned_alloc,
NUM_IMPORTS
};
enum ImportType {
PRIMARY,
SECONDARY,
NUM_IMPORT_TYPES
};
enum Mark {
MARK_VM_RUNTIME = 1,
MARK_INTERPRETER = 2,
MARK_COMPILER_ENTRY = 3,
MARK_ASYNC_PROFILER = 4, // async-profiler internals such as native hooks.
};
class NativeFunc {
private:
short _lib_index;
char _mark;
char _reserved;
char _name[0];
static NativeFunc* from(const char* name) {
return (NativeFunc*)(name - sizeof(NativeFunc));
}
public:
static char* create(const char* name, short lib_index);
static void destroy(char* name);
static size_t usedMemory(const char* name);
static short libIndex(const char* name) {
return from(name)->_lib_index;
}
static char mark(const char* name) {
return from(name)->_mark;
}
static void mark(const char* name, char value) {
from(name)->_mark = value;
}
};
class CodeBlob {
public:
const void* _start;
const void* _end;
jmethodID _method;
char* _name;
static int comparator(const void* c1, const void* c2) {
CodeBlob* cb1 = (CodeBlob*)c1;
@@ -45,55 +102,148 @@ class CodeBlob {
};
class FrameDesc;
class CodeCache {
protected:
private:
char* _name;
short _lib_index;
const void* _min_address;
const void* _max_address;
const char* _text_base;
const char* _image_base;
unsigned int _plt_offset;
unsigned int _plt_size;
void** _imports[NUM_IMPORTS][NUM_IMPORT_TYPES];
bool _imports_patchable;
bool _debug_symbols;
FrameDesc* _dwarf_table;
int _dwarf_table_length;
int _capacity;
int _count;
CodeBlob* _blobs;
void expand();
bool makeImportsPatchable();
void saveImport(ImportId id, void** entry);
public:
CodeCache() {
_capacity = INITIAL_CODE_CACHE_CAPACITY;
_count = 0;
_blobs = new CodeBlob[_capacity];
}
CodeCache(const char* name,
short lib_index = -1,
const void* min_address = NO_MIN_ADDRESS,
const void* max_address = NO_MAX_ADDRESS,
const char* image_base = NULL);
~CodeCache() {
delete[] _blobs;
}
~CodeCache();
void add(const void* start, int length, jmethodID method);
void remove(const void* start, jmethodID method);
jmethodID find(const void* address);
};
class NativeCodeCache : public CodeCache {
private:
char* _name;
const void* _min_address;
const void* _max_address;
public:
NativeCodeCache(const char* name, const void* min_address = NULL, const void* max_address = NULL);
~NativeCodeCache();
const char* name() {
const char* name() const {
return _name;
}
bool contains(const void* address) {
const void* minAddress() const {
return _min_address;
}
const void* maxAddress() const {
return _max_address;
}
const char* imageBase() const {
return _image_base;
}
bool contains(const void* address) const {
return address >= _min_address && address < _max_address;
}
void add(const void* start, int length, const char* name);
void setTextBase(const char* text_base) {
_text_base = text_base;
}
void setPlt(unsigned int plt_offset, unsigned int plt_size) {
_plt_offset = plt_offset;
_plt_size = plt_size;
}
bool hasDebugSymbols() const {
return _debug_symbols;
}
void setDebugSymbols(bool debug_symbols) {
_debug_symbols = debug_symbols;
}
void add(const void* start, int length, const char* name, bool update_bounds = false);
void updateBounds(const void* start, const void* end);
void sort();
template <typename NamePredicate>
inline void mark(NamePredicate predicate, char value) {
for (int i = 0; i < _count; i++) {
const char* blob_name = _blobs[i]._name;
if (blob_name != NULL && predicate(blob_name)) {
NativeFunc::mark(blob_name, value);
}
}
if (value == MARK_VM_RUNTIME && _name != NULL) {
// In case a library has no debug symbols
NativeFunc::mark(_name, value);
}
}
void addImport(void** entry, const char* name);
void** findImport(ImportId id);
void patchImport(ImportId id, void* hook_func);
CodeBlob* findBlob(const char* name);
CodeBlob* findBlobByAddress(const void* address);
const char* binarySearch(const void* address);
const void* findSymbol(const char* name);
const void* findSymbolByPrefix(const char* prefix);
const void* findSymbolByPrefix(const char* prefix, int prefix_len);
void setDwarfTable(FrameDesc* table, int length);
FrameDesc* findFrameDesc(const void* pc);
size_t usedMemory();
friend class UnloadProtection;
};
class CodeCacheArray {
private:
CodeCache* _libs[MAX_NATIVE_LIBS];
int _count;
size_t _used_memory;
public:
CodeCacheArray() : _count(0) {
}
CodeCache* operator[](int index) {
return _libs[index];
}
int count() {
return loadAcquire(_count);
}
size_t usedMemory() {
return _used_memory;
}
void add(CodeCache* lib) {
int index = loadAcquire(_count);
_libs[index] = lib;
_used_memory += lib->usedMemory();
storeRelease(_count, index + 1);
}
};
#endif // _CODECACHE_H

View File

@@ -0,0 +1,149 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.util.*;
import java.util.regex.Pattern;
public class Arguments {
public String title;
public String highlight;
public String output;
public String state;
public Pattern include;
public Pattern exclude;
public double minwidth;
public double grain;
public double tail = 0.1;
public int skip;
public boolean help;
public boolean reverse;
public boolean inverted;
public boolean diff;
public boolean cpu;
public boolean cpuTime;
public boolean wall;
public boolean alloc;
public boolean nativemem;
public boolean nativelock;
public boolean leak;
public boolean live;
public boolean lock;
public boolean trace;
public boolean threads;
public boolean classify;
public boolean total;
public boolean lines;
public boolean bci;
public boolean simple;
public boolean norm;
public boolean dot;
public long from;
public long to;
public long latency = -1;
public final List<String> files = new ArrayList<>();
public Arguments(String... args) {
for (int i = 0; i < args.length; i++) {
String arg = args[i];
String fieldName;
if (arg.startsWith("--")) {
fieldName = toCamelCase(arg.substring(2));
} else if (arg.startsWith("-") && arg.length() == 2) {
fieldName = alias(arg.charAt(1));
} else {
files.add(arg);
continue;
}
try {
Field f = Arguments.class.getDeclaredField(fieldName);
if ((f.getModifiers() & (Modifier.PRIVATE | Modifier.STATIC | Modifier.FINAL)) != 0) {
throw new IllegalArgumentException(arg);
}
Class<?> type = f.getType();
if (type == String.class) {
f.set(this, args[++i]);
} else if (type == boolean.class) {
f.setBoolean(this, true);
} else if (type == int.class) {
f.setInt(this, Integer.parseInt(args[++i]));
} else if (type == double.class) {
f.setDouble(this, parseRatio(args[++i]));
} else if (type == long.class) {
f.setLong(this, parseTimestamp(args[++i]));
} else if (type == Pattern.class) {
f.set(this, Pattern.compile(args[++i]));
}
} catch (NoSuchFieldException | IllegalAccessException e) {
throw new IllegalArgumentException(arg);
}
}
}
private static String alias(char c) {
switch (c) {
case 'h':
return "help";
case 'o':
return "output";
case 'r':
return "reverse";
case 'i':
return "inverted";
case 'I':
return "include";
case 'X':
return "exclude";
case 't':
return "threads";
case 's':
return "state";
default:
return String.valueOf(c);
}
}
private static String toCamelCase(String name) {
for (int i; (i = name.lastIndexOf('-', name.length() - 2)) >= 0; ) {
name = name.substring(0, i) + Character.toUpperCase(name.charAt(i + 1)) + name.substring(i + 2);
}
return name;
}
// Absolute floating point value or percentage followed by %
private static double parseRatio(String value) {
if (value.endsWith("%")) {
return Double.parseDouble(value.substring(0, value.length() - 1)) / 100;
}
return Double.parseDouble(value);
}
// Milliseconds or HH:mm:ss.S or yyyy-MM-dd'T'HH:mm:ss.S
private static long parseTimestamp(String time) {
if (time.indexOf(':') < 0) {
return Long.parseLong(time);
}
GregorianCalendar cal = new GregorianCalendar();
StringTokenizer st = new StringTokenizer(time, "-:.T");
if (time.indexOf('T') > 0) {
cal.set(Calendar.YEAR, Integer.parseInt(st.nextToken()));
cal.set(Calendar.MONTH, Integer.parseInt(st.nextToken()) - 1);
cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(st.nextToken()));
}
cal.set(Calendar.HOUR_OF_DAY, st.hasMoreTokens() ? Integer.parseInt(st.nextToken()) : 0);
cal.set(Calendar.MINUTE, st.hasMoreTokens() ? Integer.parseInt(st.nextToken()) : 0);
cal.set(Calendar.SECOND, st.hasMoreTokens() ? Integer.parseInt(st.nextToken()) : 0);
cal.set(Calendar.MILLISECOND, st.hasMoreTokens() ? Integer.parseInt(st.nextToken()) : 0);
return cal.getTimeInMillis();
}
}

View File

@@ -0,0 +1,39 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.util.ArrayList;
public class BidirectionalIndex<T> extends Index<T> {
private final ArrayList<T> reverseIndex;
public BidirectionalIndex(Class<T> cls, T empty) {
this(cls, empty, 256);
}
public BidirectionalIndex(Class<T> cls, T empty, int initialCapacity) {
super(cls, empty, initialCapacity);
this.reverseIndex = new ArrayList<>(initialCapacity);
this.reverseIndex.add(empty);
}
@Override
public int index(T key) {
assert super.size() == reverseIndex.size();
int idx = super.index(key);
if (idx < reverseIndex.size()) {
// Key already exists
return idx;
}
assert idx == reverseIndex.size();
reverseIndex.add(key);
return idx;
}
public T getKey(int idx) {
return reverseIndex.get(idx);
}
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.util.Arrays;
public class CallStack {
String[] names = new String[16];
byte[] types = new byte[16];
int size;
public void push(String name, byte type) {
if (size >= names.length) {
names = Arrays.copyOf(names, size * 2);
types = Arrays.copyOf(types, size * 2);
}
names[size] = name;
types[size] = type;
size++;
}
public void pop() {
size--;
}
public void clear() {
size = 0;
}
}

View File

@@ -0,0 +1,146 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import one.jfr.StackTrace;
import static one.convert.Frame.*;
abstract class Classifier {
enum Category {
GC("[gc]", TYPE_CPP),
JIT("[jit]", TYPE_CPP),
VM("[vm]", TYPE_CPP),
VTABLE_STUBS("[vtable_stubs]", TYPE_NATIVE),
NATIVE("[native]", TYPE_NATIVE),
INTERPRETER("[Interpreter]", TYPE_NATIVE),
C1_COMP("[c1_comp]", TYPE_C1_COMPILED),
C2_COMP("[c2_comp]", TYPE_INLINED),
ADAPTER("[c2i_adapter]", TYPE_INLINED),
CLASS_INIT("[class_init]", TYPE_CPP),
CLASS_LOAD("[class_load]", TYPE_CPP),
CLASS_RESOLVE("[class_resolve]", TYPE_CPP),
CLASS_VERIFY("[class_verify]", TYPE_CPP),
LAMBDA_INIT("[lambda_init]", TYPE_CPP);
final String title;
final byte type;
Category(String title, byte type) {
this.title = title;
this.type = type;
}
}
public Category getCategory(StackTrace stackTrace) {
long[] methods = stackTrace.methods;
byte[] types = stackTrace.types;
Category category;
if ((category = detectGcJit(methods, types)) == null &&
(category = detectClassLoading(methods, types)) == null) {
category = detectOther(methods, types);
}
return category;
}
private Category detectGcJit(long[] methods, byte[] types) {
boolean vmThread = false;
for (int i = types.length; --i >= 0; ) {
if (types[i] == TYPE_CPP) {
switch (getMethodName(methods[i], types[i])) {
case "CompileBroker::compiler_thread_loop":
return Category.JIT;
case "GCTaskThread::run":
case "WorkerThread::run":
return Category.GC;
case "java_start":
case "thread_native_entry":
vmThread = true;
break;
}
} else if (types[i] != TYPE_NATIVE) {
break;
}
}
return vmThread ? Category.VM : null;
}
private Category detectClassLoading(long[] methods, byte[] types) {
for (int i = 0; i < methods.length; i++) {
String methodName = getMethodName(methods[i], types[i]);
if (methodName.equals("Verifier::verify")) {
return Category.CLASS_VERIFY;
} else if (methodName.startsWith("InstanceKlass::initialize")) {
return Category.CLASS_INIT;
} else if (methodName.startsWith("LinkResolver::") ||
methodName.startsWith("InterpreterRuntime::resolve") ||
methodName.startsWith("SystemDictionary::resolve")) {
return Category.CLASS_RESOLVE;
} else if (methodName.endsWith("ClassLoader.loadClass")) {
return Category.CLASS_LOAD;
} else if (methodName.endsWith("LambdaMetafactory.metafactory") ||
methodName.endsWith("LambdaMetafactory.altMetafactory")) {
return Category.LAMBDA_INIT;
} else if (methodName.endsWith("table stub")) {
return Category.VTABLE_STUBS;
} else if (methodName.equals("Interpreter")) {
return Category.INTERPRETER;
} else if (methodName.startsWith("I2C/C2I")) {
return i + 1 < types.length && types[i + 1] == TYPE_INTERPRETED ? Category.INTERPRETER : Category.ADAPTER;
}
}
return null;
}
private Category detectOther(long[] methods, byte[] types) {
boolean inJava = true;
for (int i = 0; i < types.length; i++) {
switch (types[i]) {
case TYPE_INTERPRETED:
return inJava ? Category.INTERPRETER : Category.NATIVE;
case TYPE_JIT_COMPILED:
return inJava ? Category.C2_COMP : Category.NATIVE;
case TYPE_INLINED:
inJava = true;
break;
case TYPE_NATIVE: {
String methodName = getMethodName(methods[i], types[i]);
if (methodName.startsWith("JVM_") || methodName.startsWith("Unsafe_") ||
methodName.startsWith("MHN_") || methodName.startsWith("jni_")) {
return Category.VM;
}
switch (methodName) {
case "call_stub":
case "deoptimization":
case "unknown_Java":
case "not_walkable_Java":
case "InlineCacheBuffer":
return Category.VM;
}
if (methodName.endsWith("_arraycopy") || methodName.contains("pthread_cond")) {
break;
}
inJava = false;
break;
}
case TYPE_CPP: {
String methodName = getMethodName(methods[i], types[i]);
if (methodName.startsWith("Runtime1::")) {
return Category.C1_COMP;
}
break;
}
case TYPE_C1_COMPILED:
return inJava ? Category.C1_COMP : Category.NATIVE;
}
}
return Category.NATIVE;
}
protected abstract String getMethodName(long method, byte type);
}

View File

@@ -0,0 +1,478 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Comparator;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import static one.convert.Frame.*;
import static one.convert.ResourceProcessor.*;
public class FlameGraph implements Comparator<Frame> {
private static final Frame[] EMPTY_FRAME_ARRAY = {};
private static final String[] FRAME_SUFFIX = {"_[0]", "_[j]", "_[i]", "", "", "_[k]", "_[1]"};
private static final byte HAS_SUFFIX = (byte) 0x80;
private static final int FLUSH_THRESHOLD = 15000;
private static final long NEW_FRAME_DIFF = Long.MIN_VALUE;
private static final Pattern TID_FRAME_PATTERN = Pattern.compile("\\[(.* )?tid=\\d+]");
private final Arguments args;
private final Index<String> cpool = new Index<>(String.class, "");
private final Frame root = new Frame(0, TYPE_NATIVE);
private final StringBuilder outbuf = new StringBuilder(FLUSH_THRESHOLD + 1000);
private String title = "Flame Graph";
private int[] order;
private int[] cpoolMap;
private int depth;
private int lastLevel;
private long lastX;
private long lastTotal;
private long lastDiff;
private long mintotal;
private long maxdiff = -1;
public FlameGraph(Arguments args) {
this.args = args;
}
public void parseCollapsed(Reader in) throws IOException {
CallStack stack = new CallStack();
try (BufferedReader br = new BufferedReader(in)) {
for (String line; (line = br.readLine()) != null; ) {
int space = line.lastIndexOf(' ');
if (space <= 0) continue;
long ticks = Long.parseLong(line.substring(space + 1));
for (int from = 0, to; from < space; from = to + 1) {
if ((to = line.indexOf(';', from)) < 0) to = space;
String name = line.substring(from, to);
byte type = detectType(name);
if ((type & HAS_SUFFIX) != 0) {
name = name.substring(0, name.length() - 4);
type ^= HAS_SUFFIX;
}
stack.push(name, type);
}
addSample(stack, ticks);
stack.clear();
}
}
}
public void parseHtml(Reader in) throws IOException {
Frame[] levels = new Frame[128];
int level = 0;
long total = 0;
boolean needRebuild = args.reverse || args.include != null || args.exclude != null;
try (BufferedReader br = new BufferedReader(in)) {
for (String line; !(line = br.readLine()).startsWith("const cpool"); ) {
if (line.startsWith("<h1")) {
title = line.substring(line.indexOf('>') + 1, line.lastIndexOf("</h1>"));
}
}
br.readLine();
String s = "";
for (String line; (line = br.readLine()).startsWith("'"); ) {
String packed = unescape(line.substring(1, line.lastIndexOf('\'')));
s = s.substring(0, packed.charAt(0) - ' ').concat(packed.substring(1));
cpool.put(s, cpool.size());
}
while (!br.readLine().isEmpty()) ;
for (String line; !(line = br.readLine()).isEmpty(); ) {
if (line.startsWith("d=")) continue; // artifact of a differential flame graph
StringTokenizer st = new StringTokenizer(line.substring(2, line.length() - 1), ",");
int nameAndType = Integer.parseInt(st.nextToken());
char func = line.charAt(0);
if (func == 'f') {
level = Integer.parseInt(st.nextToken());
st.nextToken();
} else if (func == 'u') {
level++;
} else if (func != 'n') {
throw new IllegalStateException("Unexpected line: " + line);
}
if (st.hasMoreTokens()) {
total = Long.parseLong(st.nextToken());
}
int titleIndex = nameAndType >>> 3;
byte type = (byte) (nameAndType & 7);
byte normalizedType = type <= TYPE_INLINED || type >= TYPE_C1_COMPILED ? TYPE_JIT_COMPILED : type;
Frame f = level > 0 || needRebuild ? new Frame(titleIndex, normalizedType) : root;
fillFrameCounters(f, type, total);
if (st.hasMoreTokens()) f.inlined = Long.parseLong(st.nextToken());
if (st.hasMoreTokens()) f.c1 = Long.parseLong(st.nextToken());
if (st.hasMoreTokens()) f.interpreted = Long.parseLong(st.nextToken());
if (level > 0) {
Frame parent = levels[level - 1];
parent.put(f.key, f);
parent.self -= total;
depth = Math.max(depth, level);
}
if (level >= levels.length) {
levels = Arrays.copyOf(levels, level * 2);
}
levels[level] = f;
}
}
if (needRebuild) {
rebuild(levels[0], new CallStack(), cpool.keys());
}
}
private void rebuild(Frame frame, CallStack stack, String[] strings) {
if (frame.self > 0) {
addSample(stack, frame.self);
}
if (!frame.isEmpty()) {
for (Frame child : frame.values()) {
stack.push(strings[child.getTitleIndex()], child.getType());
rebuild(child, stack, strings);
stack.pop();
}
}
}
public void addSample(CallStack stack, long ticks) {
if (excludeStack(stack)) {
return;
}
Frame frame = root;
if (args.reverse) {
// Retain by-thread grouping, unless thread frame is skipped
int skip = args.skip;
if (skip == 0 && stack.size > 0 && isThreadFrame(stack.names[0], stack.types[0])) {
frame = addChild(frame, stack.names[0], stack.types[0], ticks);
skip = 1;
}
for (int i = stack.size; --i >= skip; ) {
frame = addChild(frame, stack.names[i], stack.types[i], ticks);
}
} else {
for (int i = args.skip; i < stack.size; i++) {
frame = addChild(frame, stack.names[i], stack.types[i], ticks);
}
}
frame.total += ticks;
frame.self += ticks;
depth = Math.max(depth, stack.size);
}
public void diff(FlameGraph base) {
// Build a map that translates this cpool keys to the base flamegraph's cpool keys
cpoolMap = Arrays.stream(cpool.keys()).mapToInt(title -> base.cpool.getOrDefault(title, -1)).toArray();
diff(base.root, root);
}
private void diff(Frame base, Frame current) {
current.diff = base == null ? NEW_FRAME_DIFF : current.self - base.self;
maxdiff = Math.max(maxdiff, Math.abs(current.diff));
for (Frame child : current.values()) {
Frame baseChild = base == null ? null : base.get(translateKey(child.key));
diff(baseChild, child);
}
}
private int translateKey(int key) {
return cpoolMap[key & TITLE_MASK] | (key & ~TITLE_MASK);
}
public void dump(OutputStream out) throws IOException {
try (PrintStream ps = new PrintStream(out, false, "UTF-8")) {
dump(ps);
}
}
public void dump(PrintStream out) {
mintotal = (long) (root.total * args.minwidth / 100);
if ("collapsed".equals(args.output)) {
printFrameCollapsed(out, root, cpool.keys());
return;
}
String tail = getResource("/flame.html");
tail = printTill(out, tail, "/*height:*/300");
int depth = mintotal > 1 ? root.depth(mintotal) : this.depth + 1;
out.print(Math.min(depth * 16, 32767));
tail = printTill(out, tail, "/*title:*/");
out.print(args.title != null ? args.title : title);
// inverted toggles the layout for reversed stacktraces from icicle to flamegraph
// and for default stacktraces from flamegraphs to icicle.
tail = printTill(out, tail, "/*inverted:*/false");
out.print(args.reverse ^ args.inverted);
tail = printTill(out, tail, "/*maxdiff:*/-1");
out.print(maxdiff);
tail = printTill(out, tail, "/*depth:*/0");
out.print(depth);
tail = printTill(out, tail, "/*cpool:*/");
printCpool(out);
tail = printTill(out, tail, "/*frames:*/");
printFrame(out, root, 0, 0);
out.print(outbuf);
tail = printTill(out, tail, "/*highlight:*/");
out.print(args.highlight != null ? "'" + escape(args.highlight) + "'" : "");
out.print(tail);
}
private void printCpool(PrintStream out) {
String[] strings = cpool.keys();
Arrays.sort(strings);
out.print("'all'");
order = new int[strings.length];
String s = "";
for (int i = 1; i < strings.length; i++) {
int prefixLen = Math.min(getCommonPrefix(s, s = strings[i]), 95);
out.print(",\n'" + escape((char) (prefixLen + ' ') + s.substring(prefixLen)) + "'");
order[cpool.get(s)] = i;
}
// cpool is not used beyond this point
cpool.clear();
}
private void printFrame(PrintStream out, Frame frame, int level, long x) {
StringBuilder sb = outbuf;
if (frame.diff != lastDiff) {
if (frame.diff == NEW_FRAME_DIFF) {
sb.append("d=U\n");
} else {
sb.append("d=").append(frame.diff).append('\n');
}
}
int nameAndType = order[frame.getTitleIndex()] << 3 | frame.getType();
boolean hasExtraTypes = (frame.inlined | frame.c1 | frame.interpreted) != 0 &&
frame.inlined < frame.total && frame.interpreted < frame.total;
char func = 'f';
if (level == lastLevel + 1 && x == lastX) {
func = 'u';
} else if (level == lastLevel && x == lastX + lastTotal) {
func = 'n';
}
sb.append(func).append('(').append(nameAndType);
if (func == 'f') {
sb.append(',').append(level).append(',').append(x - lastX);
}
if (frame.total != lastTotal || hasExtraTypes) {
sb.append(',').append(frame.total);
if (hasExtraTypes) {
sb.append(',').append(frame.inlined).append(',').append(frame.c1).append(',').append(frame.interpreted);
}
}
sb.append(")\n");
if (sb.length() > FLUSH_THRESHOLD) {
out.print(sb);
sb.setLength(0);
}
lastLevel = level;
lastX = x;
lastTotal = frame.total;
lastDiff = frame.diff;
Frame[] children = frame.values().toArray(EMPTY_FRAME_ARRAY);
Arrays.sort(children, this);
x += frame.self;
for (Frame child : children) {
if (child.total >= mintotal) {
printFrame(out, child, level + 1, x);
}
x += child.total;
}
}
private void printFrameCollapsed(PrintStream out, Frame frame, String[] strings) {
StringBuilder sb = outbuf;
int prevLength = sb.length();
if (frame != root) {
sb.append(strings[frame.getTitleIndex()]).append(FRAME_SUFFIX[frame.getType()]);
if (frame.self > 0) {
int tmpLength = sb.length();
if (maxdiff >= 0) {
sb.append(' ').append(frame.diff == NEW_FRAME_DIFF ? 0 : frame.self - frame.diff);
}
out.print(sb.append(' ').append(frame.self).append('\n'));
sb.setLength(tmpLength);
}
sb.append(';');
}
if (!frame.isEmpty()) {
for (Frame child : frame.values()) {
if (child.total >= mintotal) {
printFrameCollapsed(out, child, strings);
}
}
}
sb.setLength(prevLength);
}
private boolean excludeStack(CallStack stack) {
Pattern include = args.include;
Pattern exclude = args.exclude;
if (include == null && exclude == null) {
return false;
}
for (int i = 0; i < stack.size; i++) {
if (exclude != null && exclude.matcher(stack.names[i]).matches()) {
return true;
}
if (include != null && include.matcher(stack.names[i]).matches()) {
if (exclude == null) return false;
include = null;
}
}
return include != null;
}
private static void fillFrameCounters(Frame frame, byte type, long ticks) {
frame.self = frame.total = ticks;
switch (type) {
case TYPE_INTERPRETED:
frame.interpreted = ticks;
break;
case TYPE_INLINED:
frame.inlined = ticks;
break;
case TYPE_C1_COMPILED:
frame.c1 = ticks;
break;
}
}
private Frame addChild(Frame frame, String title, byte type, long ticks) {
frame.total += ticks;
int titleIndex = cpool.index(title);
Frame child;
switch (type) {
case TYPE_INTERPRETED:
(child = frame.getChild(titleIndex, TYPE_JIT_COMPILED)).interpreted += ticks;
break;
case TYPE_INLINED:
(child = frame.getChild(titleIndex, TYPE_JIT_COMPILED)).inlined += ticks;
break;
case TYPE_C1_COMPILED:
(child = frame.getChild(titleIndex, TYPE_JIT_COMPILED)).c1 += ticks;
break;
default:
child = frame.getChild(titleIndex, type);
}
return child;
}
private static byte detectType(String title) {
if (title.endsWith("_[j]")) {
return TYPE_JIT_COMPILED | HAS_SUFFIX;
} else if (title.endsWith("_[i]")) {
return TYPE_INLINED | HAS_SUFFIX;
} else if (title.endsWith("_[k]")) {
return TYPE_KERNEL | HAS_SUFFIX;
} else if (title.endsWith("_[0]")) {
return TYPE_INTERPRETED | HAS_SUFFIX;
} else if (title.endsWith("_[1]")) {
return TYPE_C1_COMPILED | HAS_SUFFIX;
} else if (title.contains("::") || title.startsWith("-[") || title.startsWith("+[")) {
return TYPE_CPP;
} else if (title.indexOf('/') > 0 && title.charAt(0) != '['
|| title.indexOf('.') > 0 && Character.isUpperCase(title.charAt(0))) {
return TYPE_JIT_COMPILED;
} else {
return TYPE_NATIVE;
}
}
private static boolean isThreadFrame(String name, byte type) {
return type == TYPE_NATIVE && name.startsWith("[") && TID_FRAME_PATTERN.matcher(name).matches();
}
private static int getCommonPrefix(String a, String b) {
int length = Math.min(a.length(), b.length());
for (int i = 0; i < length; i++) {
if (a.charAt(i) != b.charAt(i) || a.charAt(i) > 127) {
return i;
}
}
return length;
}
private static String escape(String s) {
if (s.indexOf('\\') >= 0) s = s.replace("\\", "\\\\");
if (s.indexOf('\'') >= 0) s = s.replace("'", "\\'");
return s;
}
private static String unescape(String s) {
if (s.indexOf('\'') >= 0) s = s.replace("\\'", "'");
if (s.indexOf('\\') >= 0) s = s.replace("\\\\", "\\");
return s;
}
@Override
public int compare(Frame f1, Frame f2) {
return order[f1.getTitleIndex()] - order[f2.getTitleIndex()];
}
public static FlameGraph parse(String input, Arguments args) throws IOException {
FlameGraph fg = new FlameGraph(args);
try (InputStreamReader in = new InputStreamReader(new FileInputStream(input), StandardCharsets.UTF_8)) {
if (input.endsWith(".html")) {
fg.parseHtml(in);
} else {
fg.parseCollapsed(in);
}
}
return fg;
}
public static void convert(String input, String output, Arguments args) throws IOException {
FlameGraph fg = parse(input, args);
try (PrintStream out = new PrintStream(output, "UTF-8")) {
fg.dump(out);
}
}
}

View File

@@ -0,0 +1,67 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.util.HashMap;
public class Frame extends HashMap<Integer, Frame> {
public static final byte TYPE_INTERPRETED = 0;
public static final byte TYPE_JIT_COMPILED = 1;
public static final byte TYPE_INLINED = 2;
public static final byte TYPE_NATIVE = 3;
public static final byte TYPE_CPP = 4;
public static final byte TYPE_KERNEL = 5;
public static final byte TYPE_C1_COMPILED = 6;
static final int TYPE_SHIFT = 28;
static final int TITLE_MASK = (1 << TYPE_SHIFT) - 1;
final int key;
long total;
long self;
long diff;
long inlined, c1, interpreted;
private Frame(int key) {
this.key = key;
}
Frame(int titleIndex, byte type) {
this(titleIndex | type << TYPE_SHIFT);
}
Frame getChild(int titleIndex, byte type) {
return super.computeIfAbsent(titleIndex | type << TYPE_SHIFT, Frame::new);
}
int getTitleIndex() {
return key & TITLE_MASK;
}
byte getType() {
if (inlined * 3 >= total) {
return TYPE_INLINED;
} else if (c1 * 2 >= total) {
return TYPE_C1_COMPILED;
} else if (interpreted * 2 >= total) {
return TYPE_INTERPRETED;
} else {
return (byte) (key >>> TYPE_SHIFT);
}
}
int depth(long cutoff) {
int depth = 0;
if (size() > 0) {
for (Frame child : values()) {
if (child.total >= cutoff) {
depth = Math.max(depth, child.depth(cutoff));
}
}
}
return depth + 1;
}
}

View File

@@ -0,0 +1,53 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.lang.reflect.Array;
import java.util.HashMap;
/**
* Container which records the index of appearance of the value it holds.
* <p>
* Allows retrieving the index of a given object in constant time, as well as
* an ordered list of all values seen.
* <p>
* The object at index 0 is always the empty object.
*
* @param <T> type of the objects held in the container.
*/
public class Index<T> extends HashMap<T, Integer> {
private final Class<T> cls;
public Index(Class<T> cls, T empty) {
this(cls, empty, 256);
}
public Index(Class<T> cls, T empty, int initialCapacity) {
super(initialCapacity);
this.cls = cls;
super.put(empty, 0);
}
public int index(T key) {
Integer index = super.get(key);
if (index != null) {
return index;
} else {
int newIndex = super.size();
super.put(key, newIndex);
return newIndex;
}
}
@SuppressWarnings("unchecked")
public T[] keys() {
T[] result = (T[]) Array.newInstance(cls, size());
for (Entry<T, Integer> entry : entrySet()) {
result[entry.getValue()] = entry.getKey();
}
return result;
}
}

View File

@@ -0,0 +1,370 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import one.jfr.*;
import one.jfr.event.*;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import java.util.Map;
import java.util.regex.Pattern;
import static one.convert.Frame.*;
public abstract class JfrConverter extends Classifier {
protected final JfrReader jfr;
protected final Arguments args;
protected final EventCollector collector;
protected Dictionary<String> methodNames;
public JfrConverter(JfrReader jfr, Arguments args) {
this.jfr = jfr;
this.args = args;
EventCollector collector = createCollector(args);
this.collector = args.nativemem && args.leak ? new MallocLeakAggregator(collector, args.tail) : collector;
}
public void convert() throws IOException {
TimeIntervals timeIntervals = readLatencyTimeIntervals();
jfr.stopAtNewChunk = true;
while (jfr.hasMoreChunks()) {
// Reset method dictionary, since new chunk may have different IDs
methodNames = new Dictionary<>();
collector.beforeChunk();
collectEvents(timeIntervals);
collector.afterChunk();
convertChunk();
}
if (collector.finish()) {
convertChunk();
}
}
protected final TimeIntervals readLatencyTimeIntervals() throws IOException {
if (args.latency < 0) return null;
TimeIntervals.Builder intervalsBuilder = new TimeIntervals.Builder();
boolean foundMethodTrace = false; // We'll throw an exception if none is found
jfr.stopAtNewChunk = true;
while (jfr.hasMoreChunks()) {
long minLatencyTicks = args.latency * jfr.ticksPerSec / 1000;
MethodTrace event;
while ((event = jfr.readEvent(MethodTrace.class)) != null) {
foundMethodTrace = true;
if (event.duration >= minLatencyTicks) {
intervalsBuilder.add(jfr.eventTimeToNanos(event.time), jfr.eventTimeToNanos(event.time + event.duration));
}
}
}
jfr.rewind();
if (!foundMethodTrace) {
throw new RuntimeException("No jdk.MethodTrace events found");
}
return intervalsBuilder.build();
}
protected EventCollector createCollector(Arguments args) {
return new EventAggregator(args.threads, args.grain);
}
protected void collectEvents(TimeIntervals timeIntervals) throws IOException {
// args.nativemem ? MallocEvent.class should always be first for the leak detection feature
Class<? extends Event> eventClass = args.nativemem ? MallocEvent.class
: args.nativelock ? NativeLockEvent.class
: args.live ? LiveObject.class
: args.alloc ? AllocationSample.class
: args.lock ? ContendedLock.class
: args.trace ? MethodTrace.class
: ExecutionSample.class;
BitSet threadStates = null;
if (args.state != null) {
threadStates = new BitSet();
for (String state : args.state.toUpperCase().split(",")) {
threadStates.set(toThreadState(state));
}
} else if (args.cpu) {
threadStates = getThreadStates(true);
} else if (args.wall) {
threadStates = getThreadStates(false);
} else if (args.cpuTime) {
threadStates = new BitSet();
threadStates.set(ExecutionSample.CPU_TIME_SAMPLE);
}
long startTicks = args.from != 0 ? toTicks(args.from) : Long.MIN_VALUE;
long endTicks = args.to != 0 ? toTicks(args.to) : Long.MAX_VALUE;
for (Event event; (event = jfr.readEvent(eventClass)) != null; ) {
if (event.time >= startTicks && event.time <= endTicks) {
if (threadStates == null || threadStates.get(((ExecutionSample) event).threadState)) {
if (timeIntervals == null || timeIntervals.contains(jfr.eventTimeToNanos(event.time))) {
collector.collect(event);
}
}
}
}
}
protected void convertChunk() {
// To be overridden in subclasses
}
protected boolean excludeStack(int stackId, int threadId, long classId) {
Pattern include = args.include;
Pattern exclude = args.exclude;
if (include == null && exclude == null) {
return false;
}
if (args.threads) {
String threadName = getThreadName(threadId);
if (exclude != null && exclude.matcher(threadName).matches()) {
return true;
}
if (include != null && include.matcher(threadName).matches()) {
if (exclude == null) return false;
include = null;
}
}
if (classId != 0) {
String className = getClassName(classId);
if (exclude != null && exclude.matcher(className).matches()) {
return true;
}
if (include != null && include.matcher(className).matches()) {
if (exclude == null) return false;
include = null;
}
}
StackTrace stackTrace = jfr.stackTraces.get(stackId);
for (int i = 0; i < stackTrace.methods.length; i++) {
String name = getMethodName(stackTrace.methods[i], stackTrace.types[i]);
if (exclude != null && exclude.matcher(name).matches()) {
return true;
}
if (include != null && include.matcher(name).matches()) {
if (exclude == null) return false;
include = null;
}
}
return include != null;
}
protected int toThreadState(String name) {
Map<Integer, String> threadStates = jfr.enums.get("jdk.types.ThreadState");
if (threadStates != null) {
for (Map.Entry<Integer, String> entry : threadStates.entrySet()) {
if (entry.getValue().startsWith(name, 6)) {
return entry.getKey();
}
}
}
throw new IllegalArgumentException("Unknown thread state: " + name);
}
protected BitSet getThreadStates(boolean cpu) {
BitSet set = new BitSet();
Map<Integer, String> threadStates = jfr.enums.get("jdk.types.ThreadState");
if (threadStates != null) {
for (Map.Entry<Integer, String> entry : threadStates.entrySet()) {
set.set(entry.getKey(), "STATE_DEFAULT".equals(entry.getValue()) == cpu);
}
}
return set;
}
// millis can be an absolute timestamp or an offset from the beginning/end of the recording
protected long toTicks(long millis) {
long nanos = millis * 1_000_000;
if (millis < 0) {
nanos += jfr.endNanos;
} else if (millis < 1500000000000L) {
nanos += jfr.startNanos;
}
return (long) ((nanos - jfr.chunkStartNanos) * (jfr.ticksPerSec / 1e9)) + jfr.chunkStartTicks;
}
@Override
public String getMethodName(long methodId, byte methodType) {
String result = methodNames.get(methodId);
if (result == null) {
methodNames.put(methodId, result = resolveMethodName(methodId, methodType));
}
return result;
}
private String resolveMethodName(long methodId, byte methodType) {
MethodRef method = jfr.methods.get(methodId);
if (method == null) {
return "unknown";
}
ClassRef cls = jfr.classes.get(method.cls);
byte[] className = jfr.symbols.get(cls.name);
byte[] methodName = jfr.symbols.get(method.name);
if (className == null || className.length == 0 || isNativeFrame(methodType)) {
return new String(methodName, StandardCharsets.UTF_8);
} else {
String classStr = toJavaClassName(className, 0, args.dot);
if (methodName == null || methodName.length == 0) {
return classStr;
}
String methodStr = new String(methodName, StandardCharsets.UTF_8);
return classStr + '.' + methodStr;
}
}
public String getClassName(long classId) {
ClassRef cls = jfr.classes.get(classId);
if (cls == null) {
return "null";
}
byte[] className = jfr.symbols.get(cls.name);
int arrayDepth = 0;
while (className[arrayDepth] == '[') {
arrayDepth++;
}
String name = toJavaClassName(className, arrayDepth, true);
while (arrayDepth-- > 0) {
name = name.concat("[]");
}
return name;
}
private String toJavaClassName(byte[] symbol, int start, boolean dotted) {
int end = symbol.length;
if (start > 0) {
switch (symbol[start]) {
case 'B':
return "byte";
case 'C':
return "char";
case 'S':
return "short";
case 'I':
return "int";
case 'J':
return "long";
case 'Z':
return "boolean";
case 'F':
return "float";
case 'D':
return "double";
case 'L':
start++;
end--;
}
}
if (args.norm) {
for (int i = end - 2; i > start; i--) {
if (symbol[i] == '/' || symbol[i] == '.') {
if (symbol[i + 1] >= '0' && symbol[i + 1] <= '9') {
end = i;
if (i > start + 19 && symbol[i - 19] == '+' && symbol[i - 18] == '0') {
// Original JFR transforms lambda names to something like
// pkg.ClassName$$Lambda+0x00007f8177090218/543846639
end = i - 19;
}
}
break;
}
}
}
if (args.simple) {
for (int i = end - 2; i >= start; i--) {
if (symbol[i] == '/' && (symbol[i + 1] < '0' || symbol[i + 1] > '9')) {
start = i + 1;
break;
}
}
}
String s = new String(symbol, start, end - start, StandardCharsets.UTF_8);
return dotted ? s.replace('/', '.') : s;
}
public StackTraceElement getStackTraceElement(long methodId, byte methodType, int location) {
MethodRef method = jfr.methods.get(methodId);
if (method == null) {
return new StackTraceElement("", "unknown", null, 0);
}
ClassRef cls = jfr.classes.get(method.cls);
byte[] className = jfr.symbols.get(cls.name);
byte[] methodName = jfr.symbols.get(method.name);
String classStr = className == null || className.length == 0 || isNativeFrame(methodType) ? "" :
toJavaClassName(className, 0, args.dot);
String methodStr = methodName == null || methodName.length == 0 ? "" :
new String(methodName, StandardCharsets.UTF_8);
return new StackTraceElement(classStr, methodStr, null, location >>> 16);
}
public String getThreadName(int tid) {
String threadName = jfr.threads.get(tid);
return threadName == null ? "[tid=" + tid + ']' :
threadName.startsWith("[tid=") ? threadName : '[' + threadName + " tid=" + tid + ']';
}
protected boolean isNativeFrame(byte methodType) {
// In JDK Flight Recorder, TYPE_NATIVE denotes Java native methods,
// while in async-profiler, TYPE_NATIVE is for C methods
return methodType == TYPE_NATIVE && jfr.getEnumValue("jdk.types.FrameType", TYPE_KERNEL) != null ||
methodType == TYPE_CPP ||
methodType == TYPE_KERNEL;
}
public String getValueType() {
if (args.nativemem) return "malloc";
if (args.alloc || args.live) return "allocations";
if (args.lock) return "locks";
return "cpu";
}
public String getSampleUnits() {
return "count";
}
public String getTotalUnits() {
if (args.nativemem || args.alloc || args.live) return "bytes";
return "nanoseconds";
}
public double counterFactor() {
return (args.lock || args.nativelock) ? jfr.nanosPerTick : 1.0;
}
// Select sum(samples) or sum(value) depending on the --total option.
// For lock and nativelock events, convert lock duration from ticks to nanoseconds.
protected abstract class AggregatedEventVisitor implements EventCollector.Visitor {
private final double factor = !args.total ? 0.0 : counterFactor();
@Override
public final void visit(Event event, long samples, long value) {
visit(event, factor == 0.0 ? samples : factor == 1.0 ? value : (long) (value * factor));
}
protected abstract void visit(Event event, long value);
}
}

View File

@@ -0,0 +1,92 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import one.jfr.JfrReader;
import one.jfr.StackTrace;
import one.jfr.event.AllocationSample;
import one.jfr.event.Event;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import static one.convert.Frame.*;
/**
* Converts .jfr output to HTML Flame Graph.
*/
public class JfrToFlame extends JfrConverter {
private final FlameGraph fg;
public JfrToFlame(JfrReader jfr, Arguments args) {
super(jfr, args);
this.fg = new FlameGraph(args);
}
@Override
protected void convertChunk() {
collector.forEach(new AggregatedEventVisitor() {
final CallStack stack = new CallStack();
@Override
public void visit(Event event, long value) {
StackTrace stackTrace = jfr.stackTraces.get(event.stackTraceId);
if (stackTrace != null) {
Arguments args = JfrToFlame.this.args;
long[] methods = stackTrace.methods;
byte[] types = stackTrace.types;
int[] locations = stackTrace.locations;
if (args.threads) {
stack.push(getThreadName(event.tid), TYPE_NATIVE);
}
if (args.classify) {
Classifier.Category category = getCategory(stackTrace);
stack.push(category.title, category.type);
}
for (int i = methods.length; --i >= 0; ) {
String methodName = getMethodName(methods[i], types[i]);
int location;
if (args.lines && (location = locations[i] >>> 16) != 0) {
methodName += ":" + location;
} else if (args.bci && (location = locations[i] & 0xffff) != 0) {
methodName += "@" + location;
}
stack.push(methodName, types[i]);
}
long classId = event.classId();
if (classId != 0) {
stack.push(getClassName(classId), (event instanceof AllocationSample)
&& ((AllocationSample) event).tlabSize == 0 ? TYPE_KERNEL : TYPE_INLINED);
}
fg.addSample(stack, value);
stack.clear();
}
}
});
}
public void dump(OutputStream out) throws IOException {
fg.dump(out);
}
public static FlameGraph parse(String input, Arguments args) throws IOException {
try (JfrReader jfr = new JfrReader(input)) {
JfrToFlame converter = new JfrToFlame(jfr, args);
converter.convert();
return converter.fg;
}
}
public static void convert(String input, String output, Arguments args) throws IOException {
FlameGraph fg = parse(input, args);
try (PrintStream out = new PrintStream(output, "UTF-8")) {
fg.dump(out);
}
}
}

View File

@@ -0,0 +1,112 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import one.heatmap.Heatmap;
import one.jfr.Dictionary;
import one.jfr.JfrReader;
import one.jfr.StackTrace;
import one.jfr.event.AllocationSample;
import one.jfr.event.ContendedLock;
import one.jfr.event.Event;
import one.jfr.event.EventCollector;
import java.io.*;
import static one.convert.Frame.TYPE_INLINED;
import static one.convert.Frame.TYPE_KERNEL;
public class JfrToHeatmap extends JfrConverter {
private final Heatmap heatmap;
public JfrToHeatmap(JfrReader jfr, Arguments args) {
super(jfr, args);
this.heatmap = new Heatmap(args, this);
}
@Override
protected EventCollector createCollector(Arguments args) {
return new EventCollector() {
long wallInterval;
private long getWallInterval() {
if (wallInterval == 0) {
String wall = jfr.settings.get("wall");
long interval = Long.parseLong(wall != null ? wall : jfr.settings.get("interval"));
wallInterval = interval != 0 ? interval : 50_000_000;
}
return wallInterval;
}
@Override
public void collect(Event event) {
int classId = 0;
byte type = 0;
if (event instanceof AllocationSample) {
classId = ((AllocationSample) event).classId;
type = ((AllocationSample) event).tlabSize == 0 ? TYPE_KERNEL : TYPE_INLINED;
} else if (event instanceof ContendedLock) {
classId = ((ContendedLock) event).classId;
type = TYPE_KERNEL;
}
long timeNs = jfr.eventTimeToNanos(event.time);
long samples = event.samples();
while (true) {
heatmap.addEvent(event.stackTraceId, event.tid, classId, type, timeNs / 1_000_000);
if (--samples <= 0) break;
// Only wall clock events can have samples > 1
timeNs += getWallInterval();
}
}
@Override
public void beforeChunk() {
heatmap.beforeChunk();
jfr.stackTraces.forEach(new Dictionary.Visitor<StackTrace>() {
@Override
public void visit(long key, StackTrace trace) {
heatmap.addStack(key, trace.methods, trace.locations, trace.types, trace.methods.length);
}
});
}
@Override
public void afterChunk() {
jfr.stackTraces.clear();
wallInterval = 0;
}
@Override
public boolean finish() {
heatmap.finish(jfr.startNanos / 1_000_000);
return false;
}
@Override
public void forEach(Visitor visitor) {
throw new AssertionError("Should not be called");
}
};
}
public void dump(OutputStream out) throws IOException {
try (PrintStream ps = new PrintStream(out, false, "UTF-8")) {
heatmap.dump(ps);
}
}
public static void convert(String input, String output, Arguments args) throws IOException {
JfrToHeatmap converter;
try (JfrReader jfr = new JfrReader(input)) {
converter = new JfrToHeatmap(jfr, args);
converter.convert();
}
try (OutputStream out = new BufferedOutputStream(new FileOutputStream(output))) {
converter.dump(out);
}
}
}

View File

@@ -0,0 +1,334 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import static one.convert.OtlpConstants.*;
import one.jfr.Dictionary;
import one.jfr.JfrReader;
import one.jfr.StackTrace;
import one.jfr.event.*;
import one.proto.Proto;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.*;
/**
* Converts .jfr output to OpenTelemetry protocol.
*/
public class JfrToOtlp extends JfrConverter {
// Size in bytes to be allocated in the buffer to hold the varint containing the length of the message
private static final int MSG_LARGE = 5;
private static final int MSG_SMALL = 1;
private final Index<String> stringPool = new Index<>(String.class, "");
private final Index<String> functionPool = new Index<>(String.class, "");
private final Index<Line> linePool = new Index<>(Line.class, Line.EMPTY);
private final Index<KeyValue> attributesPool = new Index<>(KeyValue.class, KeyValue.EMPTY);
private final Index<IntArray> stacksPool = new Index<>(IntArray.class, IntArray.EMPTY);
private final int threadNameIndex = stringPool.index(OTLP_THREAD_NAME);
private final Dictionary<AggregatedEvent> aggregatedEvents = new Dictionary<>();
// Chunk-private cache to remember mappings from stacktrace ID to OTLP stack index
private final Map<Integer, Integer> stacksIndexCache = new HashMap<>();
private double chunkCounterFactor;
private final Proto proto = new Proto(1024);
public JfrToOtlp(JfrReader jfr, Arguments args) {
super(jfr, args);
}
public void dump(OutputStream out) throws IOException {
out.write(proto.buffer(), 0, proto.size());
}
@Override
protected EventCollector createCollector(Arguments args) {
return new EventCollector() {
public void beforeChunk() {
chunkCounterFactor = counterFactor();
aggregatedEvents.clear();
stacksIndexCache.clear();
}
public void collect(Event e) {
if (excludeStack(e.stackTraceId, e.tid, 0)) {
return;
}
long key = ((long) e.tid) << 32 | e.stackTraceId;
AggregatedEvent ec = aggregatedEvents.get(key);
if (ec == null) {
ec = new AggregatedEvent();
aggregatedEvents.put(key, ec);
}
long recordedValue = !args.total ? e.samples() : chunkCounterFactor == 1.0 ? e.value() : (long) (e.value() * chunkCounterFactor);
ec.recordEvent(getUnixTimestampNanos(e.time), recordedValue);
}
private long getUnixTimestampNanos(long jfrTimestamp) {
long nanosFromStart = (long) ((jfrTimestamp - jfr.chunkStartTicks) * jfr.nanosPerTick);
return jfr.chunkStartNanos + nanosFromStart;
}
public void afterChunk() {}
public boolean finish() {
aggregatedEvents.clear();
stacksIndexCache.clear();
return false;
}
public void forEach(Visitor visitor) {
throw new UnsupportedOperationException("Not supported");
}
};
}
@Override
public void convert() throws IOException {
long rpMark = proto.startField(PROFILES_DATA_resource_profiles, MSG_LARGE);
long spMark = proto.startField(RESOURCE_PROFILES_scope_profiles, MSG_LARGE);
super.convert();
proto.commitField(spMark);
proto.commitField(rpMark);
writeProfileDictionary();
}
@Override
protected void convertChunk() {
long pMark = proto.startField(SCOPE_PROFILES_profiles, MSG_LARGE);
long sttMark = proto.startField(PROFILE_sample_type, MSG_SMALL);
proto.field(VALUE_TYPE_type_strindex, stringPool.index(getValueType()));
proto.field(VALUE_TYPE_unit_strindex,
stringPool.index(args.total ? getTotalUnits() : getSampleUnits()));
proto.commitField(sttMark);
proto.fieldFixed64(PROFILE_time_unix_nano, jfr.chunkStartNanos);
proto.field(PROFILE_duration_nanos, jfr.chunkDurationNanos());
aggregatedEvents.forEach((key, value) -> {
int stackTraceId = (int) key;
int tid = (int) (key >> 32);
writeSample(stackTraceId, tid, value);
});
proto.commitField(pMark);
}
private IntArray makeStack(int stackTraceId) {
StackTrace st = jfr.stackTraces.get(stackTraceId);
int[] stack = new int[st.methods.length];
for (int i = 0; i < st.methods.length; ++i) {
stack[i] = linePool.index(makeLine(st, i));
}
return new IntArray(stack);
}
private Line makeLine(StackTrace stackTrace, int i) {
String methodName = getMethodName(stackTrace.methods[i], stackTrace.types[i]);
int lineNumber = stackTrace.locations[i] >>> 16;
int functionIdx = functionPool.index(methodName);
return new Line(functionIdx, lineNumber);
}
private void writeSample(int stackTraceId, int tid, AggregatedEvent ae) {
// 24 is the sum of:
// 4 tags: 1 byte
// 5 * 2: max size of thread name and stack idx
// 5 * 2: max size of timestamps/values arrays
int maxLengthBytes = varintSize(24 + ae.eventsCount * (8 /* fixed64 */ + 10 /* max varint */));
long sMark = proto.startField(PROFILE_samples, maxLengthBytes);
proto.field(SAMPLE_stack_index, stacksIndexCache.computeIfAbsent(stackTraceId, key -> stacksPool.index(makeStack(key))));
String threadName = getThreadName(tid);
KeyValue threadNameKv = new KeyValue(threadNameIndex, threadName);
proto.field(SAMPLE_attribute_indices, attributesPool.index(threadNameKv));
long tMark = proto.startField(SAMPLE_timestamps_unix_nano, varintSize(8 * ae.eventsCount));
for (int i = 0; i < ae.eventsCount; ++i) {
proto.writeFixed64(ae.timestamps[i]);
}
proto.commitField(tMark);
long vMark = proto.startField(SAMPLE_values, varintSize(10 * ae.eventsCount));
for (int i = 0; i < ae.eventsCount; ++i) {
proto.writeLong(ae.values[i]);
}
proto.commitField(vMark);
proto.commitField(sMark);
}
private static int varintSize(long value) {
return (640 - Long.numberOfLeadingZeros(value | 1) * 9) / 64;
}
private void writeProfileDictionary() {
long profilesDictionaryMark = proto.startField(PROFILES_DATA_dictionary, MSG_LARGE);
// Mapping[0] must be a default mapping according to the spec
long mMark = proto.startField(PROFILES_DICTIONARY_mapping_table, MSG_SMALL);
proto.commitField(mMark);
for (String name : functionPool.keys()) {
long fMark = proto.startField(PROFILES_DICTIONARY_function_table, MSG_SMALL);
proto.field(FUNCTION_name_strindex, stringPool.index(name));
proto.commitField(fMark);
}
for (Line line : linePool.keys()) {
long locMark = proto.startField(PROFILES_DICTIONARY_location_table, MSG_SMALL);
proto.field(LOCATION_mapping_index, 0);
long lineMark = proto.startField(LOCATION_line, MSG_SMALL);
proto.field(LINE_function_index, line.functionIdx);
proto.field(LINE_lines, line.lineNumber);
proto.commitField(lineMark);
proto.commitField(locMark);
}
for (IntArray stack : stacksPool.keys()) {
long stackMark = proto.startField(PROFILES_DICTIONARY_stack_table, MSG_LARGE);
long locationIndicesMark = proto.startField(STACK_location_indices, MSG_LARGE);
for (int locationIdx : stack.array) {
proto.writeInt(locationIdx);
}
proto.commitField(locationIndicesMark);
proto.commitField(stackMark);
}
for (String s : stringPool.keys()) {
proto.field(PROFILES_DICTIONARY_string_table, s);
}
for (KeyValue kv : attributesPool.keys()) {
long aMark = proto.startField(PROFILES_DICTIONARY_attribute_table, MSG_LARGE);
proto.field(KEY_VALUE_AND_UNIT_key_strindex, kv.keyStrindex);
long vMark = proto.startField(KEY_VALUE_AND_UNIT_value, MSG_LARGE);
proto.field(ANY_VALUE_string_value, kv.value);
proto.commitField(vMark);
proto.commitField(aMark);
}
proto.commitField(profilesDictionaryMark);
}
public static void convert(String input, String output, Arguments args) throws IOException {
JfrToOtlp converter;
try (JfrReader jfr = new JfrReader(input)) {
converter = new JfrToOtlp(jfr, args);
converter.convert();
}
try (FileOutputStream out = new FileOutputStream(output)) {
converter.dump(out);
}
}
private static final class Line {
static final Line EMPTY = new Line(0, 0);
final int functionIdx;
final int lineNumber;
Line(int functionIdx, int lineNumber) {
this.functionIdx = functionIdx;
this.lineNumber = lineNumber;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Line)) return false;
Line other = (Line) o;
return functionIdx == other.functionIdx && lineNumber == other.lineNumber;
}
@Override
public int hashCode() {
int result = 17;
result = 31 * result + functionIdx;
return 31 * result + lineNumber;
}
}
private static final class KeyValue {
static final KeyValue EMPTY = new KeyValue(0, "");
final int keyStrindex;
// Only string value is fine for now
final String value;
KeyValue(int keyStrindex, String value) {
this.keyStrindex = keyStrindex;
this.value = value;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof KeyValue)) return false;
KeyValue other = (KeyValue) o;
return keyStrindex == other.keyStrindex && value.equals(other.value);
}
@Override
public int hashCode() {
int result = 17;
result = 31 * result + keyStrindex;
return 31 * result + value.hashCode();
}
}
private static final class IntArray {
static final IntArray EMPTY = new IntArray(new int[0]);
final int[] array;
final int hash;
IntArray(int[] array) {
this.array = array;
this.hash = Arrays.hashCode(array);
}
@Override
public boolean equals(Object o) {
return o instanceof IntArray && Arrays.equals(array, ((IntArray) o).array);
}
@Override
public int hashCode() {
return hash;
}
}
private static final class AggregatedEvent {
long[] timestamps = new long[1];
long[] values = new long[1];
int eventsCount = 0;
public void recordEvent(long timestamp, long value) {
if (eventsCount == timestamps.length) {
int newSize = timestamps.length * 2;
timestamps = Arrays.copyOf(timestamps, newSize);
values = Arrays.copyOf(values, newSize);
}
timestamps[eventsCount] = timestamp;
values[eventsCount] = value;
++eventsCount;
}
}
}

View File

@@ -0,0 +1,157 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import one.jfr.JfrReader;
import one.jfr.StackTrace;
import one.jfr.event.Event;
import one.proto.Proto;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.zip.GZIPOutputStream;
/**
* Converts .jfr output to <a href="https://github.com/google/pprof">pprof</a>.
*/
public class JfrToPprof extends JfrConverter {
private final Proto profile = new Proto(100000);
private final Index<String> strings = new Index<>(String.class, "");
private final Index<String> functions = new Index<>(String.class, "");
private final Index<Long> locations = new Index<>(Long.class, 0L);
public JfrToPprof(JfrReader jfr, Arguments args) {
super(jfr, args);
profile.field(1, valueType(getValueType(), args.total ? getTotalUnits() : getSampleUnits()))
.field(13, strings.index("Produced by async-profiler"));
}
@Override
protected void convertChunk() {
collector.forEach(new AggregatedEventVisitor() {
final Proto s = new Proto(100);
@Override
public void visit(Event event, long value) {
if (excludeStack(event.stackTraceId, event.tid, event.classId())) {
return;
}
profile.field(2, sample(s, event, value));
s.reset();
}
});
}
public void dump(OutputStream out) throws IOException {
profile.field(3, mapping(1, 0, Long.MAX_VALUE, "async-profiler"));
Long[] locations = this.locations.keys();
for (int i = 1; i < locations.length; i++) {
profile.field(4, location(i, locations[i]));
}
String[] functions = this.functions.keys();
for (int i = 1; i < functions.length; i++) {
profile.field(5, function(i, functions[i]));
}
String[] strings = this.strings.keys();
for (String string : strings) {
profile.field(6, string);
}
profile.field(9, jfr.startNanos)
.field(10, jfr.durationNanos());
out.write(profile.buffer(), 0, profile.size());
}
private Proto sample(Proto s, Event event, long value) {
long packedLocations = s.startField(1, 3);
long classId = event.classId();
if (classId != 0) {
int function = functions.index(getClassName(classId));
s.writeInt(locations.index((long) function << 16));
}
StackTrace stackTrace = jfr.stackTraces.get(event.stackTraceId);
if (stackTrace != null) {
long[] methods = stackTrace.methods;
byte[] types = stackTrace.types;
int[] lines = stackTrace.locations;
for (int i = 0; i < methods.length; i++) {
String methodName = getMethodName(methods[i], types[i]);
int function = functions.index(methodName);
s.writeInt(locations.index((long) function << 16 | lines[i] >>> 16));
}
}
s.commitField(packedLocations);
s.field(2, value);
if (args.threads && event.tid != 0) {
s.field(3, label("thread", getThreadName(event.tid)));
}
if (args.classify && stackTrace != null) {
s.field(3, label("category", getCategory(stackTrace).title));
}
return s;
}
private Proto valueType(String type, String unit) {
return new Proto(16)
.field(1, strings.index(type))
.field(2, strings.index(unit));
}
private Proto label(String key, String str) {
return new Proto(16)
.field(1, strings.index(key))
.field(2, strings.index(str));
}
private Proto mapping(int id, long start, long limit, String fileName) {
return new Proto(16)
.field(1, id)
.field(2, start)
.field(3, limit)
.field(5, strings.index(fileName));
}
private Proto location(int id, long location) {
return new Proto(16)
.field(1, id)
.field(4, line((int) (location >>> 16), (int) location & 0xffff));
}
private Proto line(int functionId, int line) {
return new Proto(16)
.field(1, functionId)
.field(2, line);
}
private Proto function(int id, String name) {
return new Proto(16)
.field(1, id)
.field(2, strings.index(name));
}
public static void convert(String input, String output, Arguments args) throws IOException {
JfrToPprof converter;
try (JfrReader jfr = new JfrReader(input)) {
converter = new JfrToPprof(jfr, args);
converter.convert();
}
try (FileOutputStream fos = new FileOutputStream(output);
OutputStream out = args.output.endsWith("gz") ? new GZIPOutputStream(fos, 4096) : fos) {
converter.dump(out);
}
}
}

View File

@@ -0,0 +1,175 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
public class Main {
public static void main(String[] argv) throws Exception {
Arguments args = new Arguments(argv);
if (args.help || args.files.isEmpty()) {
usage();
return;
}
if (args.files.size() == (args.diff ? 2 : 1)) {
args.files.add(".");
}
int fileCount = args.files.size() - 1;
String lastFile = args.files.get(fileCount);
boolean isDirectory = new File(lastFile).isDirectory();
if (args.output == null) {
int ext;
if (!isDirectory && (ext = lastFile.lastIndexOf('.')) > 0) {
args.output = lastFile.substring(ext + 1);
} else {
args.output = "html";
}
}
if (args.diff) {
if (fileCount != 2) {
throw new IllegalArgumentException("--diff option requires two input files");
}
if (!"html".equals(args.output) && !"collapsed".equals(args.output)) {
throw new IllegalArgumentException("--diff option requires html or collapsed output format");
}
args.norm = true; // don't let random IDs in class names spoil comparison
String input1 = args.files.get(0);
String input2 = args.files.get(1);
String output = isDirectory ? new File(lastFile, replaceExt(input2, "diff." + args.output)).getPath() : lastFile;
System.out.print("Converting " + getFileName(input2) + " vs " + getFileName(input1) + " -> " + getFileName(output) + " ");
System.out.flush();
long startTime = System.nanoTime();
FlameGraph base = parseFlameGraph(input1, args);
FlameGraph current = parseFlameGraph(input2, args);
current.diff(base);
current.dump(new FileOutputStream(output));
long endTime = System.nanoTime();
System.out.print("# " + (endTime - startTime) / 1000000 / 1000.0 + " s\n");
return;
}
for (int i = 0; i < fileCount; i++) {
String input = args.files.get(i);
String output = isDirectory ? new File(lastFile, replaceExt(input, args.output)).getPath() : lastFile;
System.out.print("Converting " + getFileName(input) + " -> " + getFileName(output) + " ");
System.out.flush();
long startTime = System.nanoTime();
convert(input, output, args);
long endTime = System.nanoTime();
System.out.print("# " + (endTime - startTime) / 1000000 / 1000.0 + " s\n");
}
}
public static void convert(String input, String output, Arguments args) throws IOException {
if (isJfr(input)) {
if ("html".equals(args.output) || "collapsed".equals(args.output)) {
JfrToFlame.convert(input, output, args);
} else if ("pprof".equals(args.output) || "pb".equals(args.output) || args.output.endsWith("gz")) {
JfrToPprof.convert(input, output, args);
} else if ("heatmap".equals(args.output)) {
JfrToHeatmap.convert(input, output, args);
} else if ("otlp".equals(args.output)) {
JfrToOtlp.convert(input, output, args);
} else {
throw new IllegalArgumentException("Unrecognized output format: " + args.output);
}
} else {
FlameGraph.convert(input, output, args);
}
}
public static FlameGraph parseFlameGraph(String input, Arguments args) throws IOException {
if (isJfr(input)) {
return JfrToFlame.parse(input, args);
} else {
return FlameGraph.parse(input, args);
}
}
private static String getFileName(String fileName) {
return fileName.substring(fileName.lastIndexOf(File.separatorChar) + 1);
}
private static String replaceExt(String fileName, String output) {
String ext = "heatmap".equals(output) ? "html" : output;
int slash = fileName.lastIndexOf(File.separatorChar);
int dot = fileName.lastIndexOf('.');
return dot > slash ? fileName.substring(slash + 1, dot + 1) + ext : fileName.substring(slash + 1) + '.' + ext;
}
private static boolean isJfr(String fileName) throws IOException {
if (fileName.endsWith(".jfr")) {
return true;
} else if (fileName.endsWith(".collapsed") || fileName.endsWith(".txt") || fileName.endsWith(".csv")) {
return false;
}
byte[] buf = new byte[4];
try (FileInputStream fis = new FileInputStream(fileName)) {
return fis.read(buf) == 4 && buf[0] == 'F' && buf[1] == 'L' && buf[2] == 'R' && buf[3] == 0;
}
}
private static void usage() {
System.out.print("Usage: jfrconv [options] <input> [<input>...] <output>\n" +
"\n" +
"Conversion options:\n" +
" -o --output FORMAT Output format: html, collapsed, pprof, pb.gz, heatmap, otlp\n" +
" -I --include REGEX Include only stacks with the specified frames\n" +
" -X --exclude REGEX Exclude stacks with the specified frames\n" +
" --diff Create differential Flame Graph from two input files\n" +
"\n" +
"JFR options:\n" +
" --cpu CPU profile (ExecutionSample)\n" +
" --cpu-time CPU profile (CPUTimeSample)\n" +
" --wall Wall clock profile\n" +
" --alloc Allocation profile\n" +
" --live Live object profile\n" +
" --nativemem malloc profile\n" +
" --leak Only include memory leaks in nativemem\n" +
" --tail RATIO Ignore tail allocations for leak profiling (10% by default)\n" +
" --lock Lock contention profile\n" +
" --nativelock Native (pthread) lock contention profile\n" +
" --trace Method traces / latency profile\n" +
" -t --threads Split stack traces by threads\n" +
" -s --state LIST Filter thread states: runnable, sleeping\n" +
" --classify Classify samples into predefined categories\n" +
" --total Accumulate total value (time, bytes, etc.)\n" +
" --lines Show line numbers\n" +
" --bci Show bytecode indices\n" +
" --simple Simple class names instead of FQN\n" +
" --norm Normalize names of hidden classes / lambdas\n" +
" --dot Dotted class names\n" +
" --from TIME Start time in ms (absolute or relative)\n" +
" --to TIME End time in ms (absolute or relative)\n" +
" --latency MS Retain only samples within MethodTraces of at least MS milliseconds\n" +
"\n" +
"Flame Graph options:\n" +
" --title STRING Flame Graph title\n" +
" --minwidth X Skip frames smaller than X%\n" +
" --grain X Coarsen Flame Graph to the given grain size\n" +
" --skip N Skip N bottom frames\n" +
" -r --reverse Reverse stack traces (defaults to icicle graph)\n" +
" -i --inverted Toggles the layout for reversed stacktraces from icicle to flamegraph\n" +
" and for default stacktraces from flamegraph to icicle\n" +
" --highlight REGEX Highlight frames matching the given pattern\n");
}
}

View File

@@ -0,0 +1,63 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
final class OtlpConstants {
static final String OTLP_THREAD_NAME = "thread.name";
static final int
PROFILES_DICTIONARY_mapping_table = 1,
PROFILES_DICTIONARY_location_table = 2,
PROFILES_DICTIONARY_function_table = 3,
PROFILES_DICTIONARY_string_table = 5,
PROFILES_DICTIONARY_attribute_table = 6,
PROFILES_DICTIONARY_stack_table = 7;
static final int
PROFILES_DATA_resource_profiles = 1,
PROFILES_DATA_dictionary = 2;
static final int RESOURCE_PROFILES_scope_profiles = 2;
static final int SCOPE_PROFILES_profiles = 2;
static final int
PROFILE_sample_type = 1,
PROFILE_samples = 2,
PROFILE_time_unix_nano = 3,
PROFILE_duration_nanos = 4;
static final int
VALUE_TYPE_type_strindex = 1,
VALUE_TYPE_unit_strindex = 2,
VALUE_TYPE_aggregation_temporality = 3;
static final int
SAMPLE_stack_index = 1,
SAMPLE_values = 2,
SAMPLE_attribute_indices = 3,
SAMPLE_timestamps_unix_nano = 5;
static final int
STACK_location_indices = 1;
static final int
LOCATION_mapping_index = 1,
LOCATION_line = 3;
static final int
LINE_function_index = 1,
LINE_lines = 2;
static final int FUNCTION_name_strindex = 1;
static final int
KEY_VALUE_AND_UNIT_key_strindex = 1,
KEY_VALUE_AND_UNIT_value = 2;
static final int ANY_VALUE_string_value = 1;
}

View File

@@ -0,0 +1,38 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
public class ResourceProcessor {
public static String getResource(String name) {
try (InputStream stream = ResourceProcessor.class.getResourceAsStream(name)) {
if (stream == null) {
throw new IOException("No resource found");
}
ByteArrayOutputStream result = new ByteArrayOutputStream();
byte[] buffer = new byte[32768];
for (int length; (length = stream.read(buffer)) != -1; ) {
result.write(buffer, 0, length);
}
return result.toString("UTF-8");
} catch (IOException e) {
throw new IllegalStateException("Can't load resource with name " + name);
}
}
public static String printTill(PrintStream out, String data, String till) {
int index = data.indexOf(till);
out.print(data.substring(0, index));
return data.substring(index + till.length());
}
}

View File

@@ -0,0 +1,79 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.Arrays;
public final class TimeIntervals {
private final long[] startIntervals;
private final long[] endIntervals;
private TimeIntervals(long[] startIntervals, long[] endIntervals) {
this.startIntervals = startIntervals;
this.endIntervals = endIntervals;
}
public boolean contains(long instant) {
int searchOut = Arrays.binarySearch(startIntervals, instant);
if (searchOut >= 0) {
return true;
}
int insertionPoint = -(searchOut + 1); // First element greater than instant
if (insertionPoint == 0) {
return false; // First interval start is greater than instant
}
int startIndex = insertionPoint - 1;
return instant <= endIntervals[startIndex];
}
public static final class Builder {
// No overlapping intervals
private final TreeMap<Long, Long> timeIntervals = new TreeMap<>();
public void add(long startInstant, long endInstant) {
if (startInstant > endInstant) {
throw new IllegalArgumentException("'startInstant' should not be after 'endInstant'");
}
// Are there shorter intervals which overlap with the new interval?
NavigableMap<Long, Long> view = timeIntervals.subMap(startInstant, true /* inclusive */, endInstant, true /* inclusive */);
Map.Entry<Long, Long> last = view.pollLastEntry();
if (last != null) {
endInstant = Long.max(last.getValue(), endInstant);
}
view.clear();
// Perhaps the end of the interval before 'view' ends after startInstant?
Map.Entry<Long, Long> floor = timeIntervals.floorEntry(startInstant);
if (floor != null) {
long floorEnd = floor.getValue();
if (floorEnd >= startInstant) {
timeIntervals.remove(floor.getKey());
startInstant = floor.getKey();
endInstant = Long.max(endInstant, floorEnd);
}
}
timeIntervals.put(startInstant, endInstant);
}
public TimeIntervals build() {
long[] startIntervals = new long[timeIntervals.size()];
long[] endIntervals = new long[timeIntervals.size()];
int index = 0;
for (Map.Entry<Long, Long> entry : timeIntervals.entrySet()) {
startIntervals[index] = entry.getKey();
endIntervals[index] = entry.getValue();
++index;
}
return new TimeIntervals(startIntervals, endIntervals);
}
}
}

Some files were not shown because too many files have changed in this diff Show More