Compare commits

...

272 Commits

Author SHA1 Message Date
Andrei Pangin
f6ca3c1ff8 dumpOtlp() should accept Counter argument (#1728) 2026-04-15 19:38:08 +01:00
Andrei Pangin
86adc1605a Updated CHANGELOG 2026-04-15 16:24:06 +01:00
Andrei Pangin
804df3ac8e #1203: Fix "Instance field not found" when using -Xcheck:jni on JDK 8 2026-04-15 16:22:31 +01:00
Andrei Pangin
8aab346c3b #1727: Allocation profile has wrong units in OTLP format 2026-04-14 15:56:20 +01:00
Andrei Pangin
7bd911a007 Release 4.4 2026-04-13 22:33:42 +01:00
Andrei Pangin
2df2733d1d #1676: Make dwarf stack walking mode an alias for vm
Co-authored-by: Bara' Hasheesh <bara.hasheesh@gmail.com>
2026-04-13 21:51:25 +01:00
Andrei Pangin
4d5441f2cd Retry vDSO unwinding on AArch64 using the default frame (#1724) 2026-04-13 20:05:15 +01:00
Andrei Pangin
cc9e91bd8f Retry vDSO unwinding on AArch64 using the default frame (#1724)
Co-authored-by: Bara' Hasheesh <bara.hasheesh@gmail.com>
2026-04-13 15:53:56 +01:00
Andrei Pangin
e899de6a9c #1720: Dark mode toggle in HTML FlameGraph
Co-authored-by: Enrique Wood <ewoodg@hotmail.com>
2026-04-11 02:01:37 +01:00
Andrei Pangin
fbc3942095 Do not walk past virtual thread continuation barriers 2026-04-10 18:49:06 +01:00
Andrei Pangin
6afb9572c1 Use streq/startsWith instead of strcmp/strncmp 2026-04-10 17:25:11 +01:00
Hope Kim
f763e195ee Correct mmap failure check on macOS (#1713) 2026-04-08 22:45:37 +01:00
Bara' Hasheesh
f1b87ead07 Cleanup JVM detection for the test runner (#1717) 2026-04-01 15:48:08 +01:00
Andrei Pangin
4dda6c40af #1716: Wall-clock Heatmap does not count samples correctly 2026-04-01 01:17:50 +01:00
Andrei Pangin
264b8ab5da #1715: Fix Zing crash when profiling cpu+wall together 2026-03-31 00:19:39 +01:00
Andrei Pangin
c383a35ff4 Adjust limits for tests sensitive to CPU time 2026-03-27 14:38:10 +00:00
Diego Lovison
82ae80a660 doc: Improve readability of the jfrconv conversion table (#1711) 2026-03-26 13:19:44 +00:00
Andrei Pangin
7e92b5cdac Fix GHA test failures 2026-03-26 00:12:24 +00:00
Andrei Pangin
fe69e4fab2 An option to truncate deep stacks (#1706) 2026-03-25 23:51:55 +00:00
Andrei Pangin
d94581c24c Workaround for JFR shutdown race (#1707) 2026-03-25 22:51:24 +00:00
alevymyers
f3c31942fb Ensure remaining buffer is sufficient for event data in JfrReader (#1697) 2026-03-24 20:49:02 +00:00
Andrei Pangin
a246ced814 An option to limit size of the call trace storage (#1705) 2026-03-23 17:07:33 +00:00
Andrei Pangin
8d653dd5e0 Unify uses of gcc atomics (#1704) 2026-03-20 16:14:48 +00:00
Andrei Pangin
cc0eab1789 Speed-up stack walking by optimizing nmethod name comparison (#1701) 2026-03-18 18:05:58 +00:00
Andrei Pangin
842b612e08 Allow negative keys in JFR constant pool (#1699) 2026-03-17 15:01:00 +00:00
Andrei Pangin
ff4336d136 #1672: Flamegraph: use Ctrl+Click in addition to Alt+Click to remove stacks 2026-03-16 23:44:03 +00:00
Dan Lutker
e1dd4c05f6 Adding a multi-threaded test executor (#1688) 2026-03-10 22:40:54 +00:00
Dan Lutker
174dc31d88 Add workflow_dispatch for manual runs (#1693) 2026-02-26 18:07:10 +00:00
korniltsev-grafanista
dbd9fc7520 Fix parsing /proc/self/stat (#1690) 2026-02-20 02:10:55 +00:00
Andrei Pangin
dc69cf4b80 Unwind AArch64 generated stubs on JDK 26+ (#1684) 2026-02-11 16:03:40 +00:00
Andrei Pangin
abc8b7f493 #1686: Clarify table allocation logic in CallTraceStorage 2026-02-11 15:57:49 +00:00
Bara' Hasheesh
4ea8e5bbb6 Remove duplicate CodeCache lookup operation in walkVM (#1682) 2026-02-10 16:26:27 +00:00
Bara' Hasheesh
71ad47a46e More AGCT cleanup after removal of recovery tricks (#1683) 2026-02-09 13:46:57 +00:00
Andrei Pangin
0023021ddf #1675: Cleanup AGCT recovery tricks and remove safemode option 2026-02-08 01:38:34 +00:00
Andrei Pangin
444d0e6353 Suppress gcc warning 'parameter passing for argument changed' 2026-02-07 18:23:06 +00:00
Andrei Pangin
7e2ed0e77e #1677: Remove cstack=lbr option 2026-02-07 16:52:47 +00:00
Andrei Pangin
68244fbf6f #1678: Eliminate redundant listing of profiler arguments 2026-02-06 23:20:30 +00:00
Bara' Hasheesh
31042f13bc An option to select TLAB based AllocTracer engine with JDK 11+ (#1671) 2026-02-05 10:56:06 +00:00
Andrei Pangin
a3c6d92d39 Permanently remove check command (#1673) 2026-02-05 10:47:59 +00:00
Andrei Pangin
036c87e50d Differential Flame Graphs (#1553) 2026-02-05 00:00:48 +00:00
Andrei Pangin
15b1161f57 Move converter Main class to the one.convert package (#1670) 2026-02-04 01:13:21 +00:00
Andrei Pangin
b02434bd9d #1268: Documentation should refer to libasyncProfiler.dylib on macOS 2026-02-01 01:07:26 +00:00
Johannes Bechberger
9c293283f2 Mention DebugNonSafepoints flag in "Getting Started" (#1270)
Signed-off-by: Johannes Bechberger <johannes.bechberger@sap.com>
Co-authored-by: Andrei Pangin <1749416+apangin@users.noreply.github.com>
2026-01-31 21:09:25 +00:00
Andrei Pangin
3aba5ee521 #1668: Document --include/--exclude as non-JFR options 2026-01-31 19:01:48 +00:00
Andrei Pangin
078935591f Removed extra spaces 2026-01-23 18:16:16 +00:00
Vishal Chand
dc88d3f756 #1585: Scale perf counter in case of multiplexing (#1631) 2026-01-23 18:13:03 +00:00
Andrei Pangin
a071e8a2f8 Specify JAR manifest in Maven build 2026-01-23 01:21:31 +00:00
Bara' Hasheesh
6e6acc1769 Fix recordCpuMultiEngine test failure when kernel profiling is restricted (#1664) 2026-01-21 16:28:16 +00:00
Andrei Pangin
709a777393 Updated links to v4.3 2026-01-21 00:00:58 +00:00
Francesco Andreuzzi
b9d6843ae5 Provide non-aggregated samples in OTLP converter (#1660) 2026-01-20 00:28:09 +00:00
Long Yang
3722d05ba0 #1528: Add a hard-coded limit on the maximum number of jmethodIDs (#1656) 2026-01-19 12:46:30 +00:00
Francesco Andreuzzi
872be63220 Fix VMStructs::initJvmFunctions call order (#1658) 2026-01-15 10:58:30 +00:00
Bara' Hasheesh
a89d7ddeba Prefer perf-events engine when record-cpu or target-cpu are selected (#1654) 2026-01-15 10:29:04 +00:00
Andrei Pangin
f789c4f748 CI: Fix job dependencies 2026-01-14 02:37:42 +00:00
Andrei Pangin
d43d328b58 Fix trailing spaces 2026-01-14 02:23:19 +00:00
Andrei Pangin
037c09906d Updated CHANGELOG 2026-01-14 02:14:27 +00:00
Andrei Pangin
f352361814 Swapped toolbar icon colors 2026-01-13 19:53:19 +00:00
Vishesh Ruparelia
19b22efeff Support include/exclude flag for JFR to pprof conversion (#1655) 2026-01-13 19:21:56 +00:00
Andrei Pangin
16fdebf78c #1140: Flamegraph hot keys should not trigger default browser action
Co-authored-by: Kerem Kat <keremkat@gmail.com>
2026-01-13 18:12:22 +00:00
Andrei Pangin
0b73f655a8 #1140: Flamegraph improvements: legend, hot keys, new toolbar icons
Co-authored-by: Kerem Kat <keremkat@gmail.com>
2026-01-13 18:01:59 +00:00
Andrei Pangin
cf4739a61b #1653: Disable currentTimeMillis test on JDK 11 2026-01-12 19:54:48 +00:00
Bara' Hasheesh
757bf8edd3 Disallow incorrect usage of jfrsync (#1647) 2026-01-12 19:10:21 +00:00
Vishesh Ruparelia
d0d16240d4 Support include/exclude flag for JFR to OTLP conversion (#1635) 2026-01-12 19:10:05 +00:00
Bara' Hasheesh
fc9b5c85cf Do not record cpu frame on non-perf samples (#1651) 2026-01-12 13:47:33 +00:00
Francesco Andreuzzi
436d5b5066 Fix stop sequence in Profiler::start (#1648) 2026-01-10 04:21:18 +03:00
Vishesh Ruparelia
4663784b98 Support include/exclude flag for JFR to heatmap conversion (#1633) 2026-01-08 16:51:09 +03:00
Francesco Andreuzzi
d2172a6382 Add missing stub for com.google.protobuf.ProtocolStringList (#1644) 2026-01-08 15:25:47 +03:00
Francesco Andreuzzi
93b6ae376d Address breaking changes in OTLP 1.9.0 (#1624) 2026-01-08 03:03:24 +03:00
Andrei Pangin
ee4ac6e888 Unwind vDSO correctly on Linux-ARM64 (#1641) 2026-01-08 02:19:52 +03:00
Francesco Andreuzzi
865e8b91f8 Add timeSpan to WallClockSample (#1640) 2026-01-08 01:55:47 +03:00
Bara' Hasheesh
2a4f329cba Fix issue where the jfrconv uses native lock samples for leak detection (#1630) 2025-12-23 12:24:33 +00:00
Andrei Pangin
9c425ca74f Harden crash protection in StackWalker (#1629) 2025-12-21 23:17:38 +00:00
Andrei Pangin
d871819848 Stylistic changes after #1628 2025-12-20 20:59:10 +00:00
Andrew Azores
bf84fadb3c Implement -javaagent premain/agentmain, register MXBean (#1628) 2025-12-20 20:36:06 +00:00
Bara' Hasheesh
fde780e275 Fix timeout & loop combined usage (#1625) 2025-12-19 11:11:34 +00:00
Francesco Andreuzzi
6e04336375 Filter JFR events by latency (#1620) 2025-12-16 20:10:06 +00:00
Bara' Hasheesh
a77d091e08 asprof stop fails when called after VM shutdown (#1623) 2025-12-16 10:46:56 +00:00
Bara' Hasheesh
999f0c7ae3 Prevent profiler from deadlocking the JVM death when using jfrsync (#1619) 2025-12-12 12:13:36 +00:00
Bara' Hasheesh
cdaf6e76ba Create a GraalVM filter for async-profiler tests (#1611) 2025-12-10 22:45:57 +00:00
Francesco Andreuzzi
3a493bedc4 Fix code cache memory leak in lock profiling while looping (#1575) 2025-12-08 17:36:06 +00:00
Bara' Hasheesh
7b24ad89b6 Sync jattach sources - avoid busy waiting for dead process (#1615) 2025-12-05 21:15:22 +00:00
Bara' Hasheesh
5bf0e311c2 asprof collect fails when the target process concurrently terminates (#1614) 2025-12-05 20:51:45 +00:00
Andrei Pangin
8772214f7e Updated links to v4.2.1 2025-11-22 03:14:22 +00:00
Jaromir Hamala
3bb1e72d09 Timezone switcher between Local and UTC time in heatmaps (#1530) 2025-11-20 01:01:24 +00:00
Kerem Kat
ea0b34b578 Use ref-cycles instead of cycles in PmuTests to fix flakiness (#1601) 2025-11-19 16:57:43 +00:00
Bara' Hasheesh
e92eb45812 Support running integration tests on a different JDK (#1602) 2025-11-19 16:36:15 +00:00
Bara' Hasheesh
d304fd5d75 Do not fail cacheMisses test if no samples are collected (#1600) 2025-11-19 13:07:23 +00:00
Andrei Pangin
61a676f87f Added missing docs on nativelock and trace options 2025-11-18 23:58:25 +00:00
Andrei Pangin
b855e0c2c4 Follow up: Workaround for the PERF_EVENT_IOC_REFRESH bug (#1599)
Signed-off-by: Andrei Pangin <1749416+apangin@users.noreply.github.com>
2025-11-18 17:26:44 +00:00
Andrei Pangin
763616aa17 Workaround for PERF_EVENT_IOC_REFRESH bug (#1599) 2025-11-18 15:33:16 +00:00
Andrei Pangin
a25e5194bf Do not block any signals during execution of a custom crash handler (#1596) 2025-11-17 10:06:56 +00:00
Andrei Pangin
ff24f1220c Do not set [bug] label automatically 2025-11-14 22:05:01 +00:00
Kerem Kat
124eca439e Fix build with MERGE=false (#1594) 2025-11-14 20:34:16 +00:00
Bara' Hasheesh
4bcfe9ee7b Re-enable ComptaskTests for JDK25 (#1592) 2025-11-14 20:29:56 +00:00
Bara' Hasheesh
0b7ee6d830 Support compilation on modern JDKs. Drop JDK7 support (#1590) 2025-11-14 15:53:37 +00:00
Andrei Pangin
01325ea87c #1584: JfrReader loops on corrupted recordings 2025-11-07 21:36:38 +00:00
Bara' Hasheesh
78172b7cb0 Optimize make targets to not compile C/C++ files when not needed (#1581) 2025-11-06 13:10:00 +00:00
Soumadipta Roy
fd269e6450 Remove more redundant check declarations (#1579) 2025-11-05 13:04:15 +00:00
Soumadipta Roy
61d48a6b43 Remove redundant check declarations (#1577) 2025-11-05 11:56:59 +00:00
Soumadipta Roy
c6c2fc1497 Deprecate check command (#1574) 2025-11-04 23:12:16 +00:00
Bara' Hasheesh
585054661f [test] Change stub frame check to be more generic (#1576) 2025-11-03 17:49:12 +00:00
Soumadipta Roy
0cb40bee11 Reduce mutex and rdlock iterations for nativelock tests (#1571) 2025-10-30 14:44:38 +00:00
Francesco Andreuzzi
8c851ddad2 [GHA] Fix trailing whitespaces checker (#1572) 2025-10-30 12:03:25 +00:00
Francesco Andreuzzi
8fa4fd0b78 Expose async-profiler metrics (#1568) 2025-10-30 00:01:37 +00:00
Francesco Andreuzzi
9611d55567 [GHA] Add checker for trailing whitespace (#1569) 2025-10-29 11:17:05 +00:00
Francesco Andreuzzi
eb4d126a2d Force test runs to use correct async-profiler library (#1565) 2025-10-28 18:24:28 +00:00
Soumadipta Roy
85ae06b177 #1547: Implement native lock profiling (#1549) 2025-10-23 19:24:55 +01:00
Francesco Andreuzzi
872631f82b [GHA] Add more patterns to the EOF newline checker (#1561) 2025-10-23 15:29:19 +01:00
Francesco Andreuzzi
7482988021 [GHA] Add checker for EOF newline (#1560) 2025-10-23 13:39:36 +01:00
Andrei Pangin
e647076de5 Broken formatting in ProfilerOptions doc 2025-10-22 20:37:59 +01:00
Bara' Hasheesh
c478490ce9 Fix record-cpu bug when kernel stacks are not available (#1558) 2025-10-22 18:47:56 +01:00
Bara' Hasheesh
6e10742be1 Fix duplicate native stacks when -F mixed is used with fp/dwarf stack walker (#1546) 2025-10-22 00:59:58 +01:00
Adnan Khan
49e56704f9 ci: scope down GitHub Token permissions (#1556) 2025-10-22 00:46:57 +01:00
Andrei Pangin
79d9058b18 Parse FlameGraph title from HTML input (#1555) 2025-10-21 14:37:50 +01:00
Andrei Pangin
9674d20873 Converter API to parse jfr or collapsed to a FlameGraph object (#1551) 2025-10-21 14:27:23 +01:00
Andrei Pangin
538f3a2e48 Update links to v4.2 in the documentation (#1548) 2025-10-20 16:10:08 +01:00
Andrei Pangin
e35113a647 Added test for the native frame duplication issue (#1545) 2025-10-17 22:44:59 +01:00
Andrei Pangin
8f7e4e19cc Fix duplicated native stacks with perf_events (#1544) 2025-10-17 16:20:10 +01:00
Bara' Hasheesh
88b7ba3838 Add missing nativemem jfr setting (#1542) 2025-10-17 12:00:59 +01:00
Andrei Pangin
7a86354d77 Release 4.2 2025-10-15 22:11:49 +01:00
Andrei Pangin
fa5ada6747 Use VMStructs stack walking mode by default (#1539) 2025-10-15 22:00:25 +01:00
Francesco Andreuzzi
fc2a9b928c Latency Profiling enhancements (#1499) 2025-10-15 20:28:13 +01:00
Andrei Pangin
5aee9cdb03 Support advanced stack walking features with cstack=vm (#1537) 2025-10-15 20:25:19 +01:00
Francesco Andreuzzi
dd0d233499 Skip Instrument::stop in VMDeath callback (#1538) 2025-10-15 19:55:18 +01:00
Bara' Hasheesh
fb673227c7 GHA: replace macos-13 with macos-15-intel (#1527) 2025-10-09 02:47:08 +02:00
Lukas Bloder
1a15a0e86a Expose dictionary that maps event id to javaThreadId (#1526) 2025-10-07 10:47:11 +01:00
Andrei Pangin
ea095462ca Use JavaFrameAnchor to find top Java frame with cstack=vm (#1517) 2025-10-03 14:17:40 +01:00
Soumadipta Roy
3634cdc1ac Fix retryCount final issue (#1522) 2025-10-02 17:54:07 +01:00
Soumadipta Roy
3e663759da #1510: Add option to retry tests with parameterized retry count (#1520) 2025-10-02 17:41:06 +01:00
Bara' Hasheesh
97c35ac96c Fix matching pattern for sys_getdents (#1521) 2025-10-02 17:39:37 +01:00
Bara' Hasheesh
6453ccca43 Fix UnsatisfiedLinkError when tmpdir is set to a relative path (#1515) 2025-10-01 15:24:01 +01:00
Kerem Kat
f9b78102ce Add CPUTimeSample event support to jfrconv (#1475) 2025-09-30 12:56:30 +01:00
Kerem Kat
861f4f4f63 Add Liberica to the CI on Alpaquita musl (#1466) 2025-09-30 12:12:41 +01:00
Bara' Hasheesh
0eba17edd0 Add more GHA jobs to cover JDK versions on ARM (#1508) 2025-09-29 14:59:57 +01:00
Kerem Kat
bfa821b6ce Make workflow names shorter (#1514) 2025-09-26 23:36:50 +01:00
Kerem Kat
e3f646a1d9 Fix integration test dependencies to build and cosmetic changes (#1502) 2025-09-26 11:49:10 +01:00
Francesco Andreuzzi
7338c30d88 Clean up EventMask usages (#1507) 2025-09-26 11:36:15 +01:00
Andrei Pangin
d97a7d3343 Detect if calloc calls malloc (#1500)
Fixes `nativemem` profiling on Alpaquita Linux
2025-09-23 13:27:38 +01:00
Bara' Hasheesh
07b3e747d1 Eliminate period bias in CPU smoke test (#1465) 2025-09-17 18:30:56 +01:00
Andrei Pangin
70a13bcd03 asprof --latency option 2025-09-16 01:08:00 +01:00
Francesco Andreuzzi
6f2a9b80f8 Optimize method tracing when the function is not profiled (#1471) 2025-09-15 21:10:52 +01:00
Bara' Hasheesh
145fc2dd28 Fix processSamplingWithMemoryThreshold possible failure (#1494) 2025-09-12 16:26:08 +01:00
Bara' Hasheesh
6fc51db16e Fix comptask test failure (#1474) 2025-09-11 15:35:33 +01:00
Andrei Pangin
49ae9cfe7f Differentiate Java and non-Java threads using vtable (#1470) 2025-09-10 23:30:11 +01:00
Kerem Kat
fcf2734f56 Reconvert outdated flamegraph.html (#1472) 2025-09-10 19:03:05 +01:00
Andrei Pangin
2c188fe490 Fix compilation with JDK 8 headers 2025-09-09 12:51:49 +01:00
Rohitash Kumar
f6e850c5f5 Fix GetProcessIds_returns_valid_pids test (#1468) 2025-09-09 11:44:11 +01:00
Andrei Pangin
62307a2418 Fix compiler warning about sscanf 2025-09-07 20:46:29 +01:00
Rohitash Kumar
b30f5f1da1 System wide process sampling on Linux (#1411) 2025-09-06 15:11:17 +01:00
Bara' Hasheesh
af8fabe3db Unwind top frame on ARM using link register (#1463) 2025-09-06 03:08:59 +01:00
Francesco Andreuzzi
2e0e3ab792 Java method tracing + Latency profiling (#1421) 2025-09-06 01:06:09 +01:00
Francesco Andreuzzi
9f687fb07e Add -Wunused-variable compilation flag (#1462) 2025-09-03 21:09:23 +01:00
Francesco Andreuzzi
e052d51323 Profile:: _symbol_map is unused (#1456) 2025-08-29 22:28:30 +03:00
Francesco Andreuzzi
6ebadb87cf Thread name in OpenTelemetry output (#1448) 2025-08-29 00:12:11 +03:00
Andrei Pangin
5454c9bf7f Special handling of prologue and epilogue of compiled methods with cstack=vm (#1449) 2025-08-28 11:43:20 +03:00
Bara' Hasheesh
1eb40f446c [test] Compare ContendedLock duration against total thread blocked time (#1443) 2025-08-28 11:42:51 +03:00
Bara' Hasheesh
df063a6f6e Forbid setting begin and end to the same address (#1445) 2025-08-28 11:34:24 +03:00
Bara' Hasheesh
d651a7a326 Unwind dylib stubs as empty frames on macOS (#1450) 2025-08-27 02:52:11 +03:00
Andrei Pangin
512f7b88cf Allow cstack=vm for 32-bit ports 2025-08-24 01:15:13 +03:00
Adina-Andreea Zugravescu
ead97aca72 Fix sized string writer method in protobuf writer (#1447) 2025-08-21 19:14:21 +03:00
Andrei Pangin
6c61fb6c83 Remove harmful -momit-leaf-frame-pointer gcc flag (#1438) 2025-08-18 12:04:06 +01:00
Francesco Andreuzzi
02a8fdb9f5 Allow wildcards in Instrument profiling engine (#1435) 2025-08-14 14:47:26 +01:00
Bara' Hasheesh
0e551b0fef Two wall-clock profilers interfere with each other (#1417) 2025-08-13 15:37:03 +01:00
Francesco Andreuzzi
9b5e3f330a Smoke tests for JFR converter (#1434) 2025-08-13 13:55:00 +01:00
Francesco Andreuzzi
89ead820f0 Per-thread flamegraph option in JFR heatmap converter (#1414) 2025-08-13 13:42:38 +01:00
Francesco Andreuzzi
b320af7ad3 Preserve compatibility with old JFR reader (#1431) 2025-08-07 14:13:38 +01:00
Andrei Pangin
a9e8c8d558 Re-implement SafeAccess (#1427) 2025-08-06 20:07:03 +01:00
Andrei Pangin
ca58e81005 Fixed gcc warning on AArch64 2025-08-06 01:59:58 +01:00
Andrei Pangin
a035e3e4d1 Allow cross-compilation for 32-bit platforms 2025-08-06 01:26:32 +01:00
Bara' Hasheesh
f62a53ed3d More robust checks in parseMultiModeRecording test (#1428) 2025-08-04 18:49:24 +01:00
Bara' Hasheesh
8e64342485 Fix randomly failing tests (#1392) 2025-08-01 16:45:42 +01:00
Francesco Andreuzzi
b6d442b542 Skip hidden directories while looking for tests (#1426) 2025-07-31 15:32:26 +01:00
Francesco Andreuzzi
3237a0ce9b Extract JFR symbol lookup (#1378) 2025-07-31 00:46:54 +01:00
Francesco Andreuzzi
c612cd70e9 [GHA] Disable matrix fail-fast (#1296) 2025-07-30 21:19:35 +01:00
Francesco Andreuzzi
f461a06d23 Proactively check classpath in tests spawning a child process (#1422) 2025-07-30 12:07:46 +01:00
Adina-Andreea Zugravescu
8593be1600 Add time_nanos and duration_nanos to OTLP profiles with test (#1413) 2025-07-30 01:56:23 +01:00
Bara' Hasheesh
88c46da067 Refactor RegularPeak test to use javaagent (#1404) 2025-07-29 12:12:32 +01:00
Francesco Andreuzzi
fc6ffb3d6e Remove some clang-tidy checks (#1419) 2025-07-28 16:29:59 +01:00
Bara' Hasheesh
fd157a8a42 Add synthetic symbols for Mach-O stubs/trampolines (#1416) 2025-07-28 13:47:44 +01:00
Andrei Pangin
2a0bae6e06 #1395: VMStructs unwinding fails on JDK 26 2025-07-28 01:07:20 +01:00
Francesco Andreuzzi
49b7320521 Simplify heatmap JFR converter (#1388) 2025-07-25 13:49:34 +01:00
Andrei Pangin
cdedd3fb22 Change Maven Publisher from OSSRH to Central Portal 2025-07-21 22:18:20 +01:00
Andrei Pangin
da812fca7a Update links to v4.1 in the documentation 2025-07-21 14:30:12 +01:00
Andrei Pangin
5930966a92 Release 4.1 2025-07-21 02:35:48 +01:00
Andrei Pangin
7737df342d Updated CHANGELOG 2025-07-21 02:30:07 +01:00
Andrei Pangin
843f1d9f3e Unwind checksum and digest intrinsics on ARM64 (#1400) 2025-07-21 02:28:54 +01:00
Andrei Pangin
733f2a513c Rolled back invalid fragment from previous commit 2025-07-18 20:16:06 +01:00
Andrei Pangin
9824786981 #1389: Incorrect top frame for synchronous events with cstack=vm on ARM64 (#1399) 2025-07-18 16:30:13 +01:00
Soumadipta Roy
5fffdb1eaa Rewrite jfrconv executable to shell (#1366) 2025-07-17 15:59:20 +01:00
Francesco Andreuzzi
7bf8528f75 Separate workflow for automated clang-tidy review (#1384) 2025-07-16 21:45:52 +01:00
Vishesh Ruparelia
80ae8aed19 Improve stack walking termination logic (#1393) 2025-07-15 15:44:13 +01:00
Bara' Hasheesh
1c1a14c1ec Fix intermittent failures of JfrTests with live option (#1376) 2025-07-15 15:36:20 +01:00
Andrei Pangin
83e9bdd9bd Typo in docs 2025-07-14 18:18:26 +01:00
Bara' Hasheesh
22ce08f5ef #1380: Workaround clang type promotion bug (#1390) 2025-07-14 14:22:53 +01:00
Bara' Hasheesh
7c4385b0b1 JFR writer crashes when using cstack=vmx (#1387) 2025-07-11 13:11:09 +01:00
Bara' Hasheesh
461a3c1b93 Correctly check if profiler is preloaded (#1374) 2025-07-10 18:26:45 +01:00
Francesco Andreuzzi
5b178bfc5c Temporarily disable clang-tidy automatic comments (#1382) 2025-07-10 14:19:56 +01:00
Francesco Andreuzzi
520b897dce Create test/deps if it does not exist before running Makefile recipes (#1375) 2025-07-09 19:58:41 +01:00
Bara' Hasheesh
a70f25e00f Save all generated logs for debug purposes (#1373) 2025-07-09 18:44:37 +01:00
Francesco Andreuzzi
f79729167a Test OTLP output format (#1331) 2025-07-09 13:33:39 +01:00
Bara' Hasheesh
f627b3157b Give tests unique suffix names (#1371) 2025-07-08 17:35:53 +01:00
Francesco Andreuzzi
85fefd2800 Publish clang-tidy comments only for non-draft PRs (#1367) 2025-07-07 19:11:45 +01:00
Francesco Andreuzzi
5091304efd Ensure that only files under src/ are checked in cpp-lint-diff (#1365) 2025-07-07 14:12:13 +01:00
Francesco Andreuzzi
c42bf7ad9d Cancel redundant in-progress GHA runs (#1363) 2025-07-07 11:50:25 +01:00
Francesco Andreuzzi
2b8dffff27 JFR to OTLP converter (#1336) 2025-07-04 19:43:22 +01:00
Francesco Andreuzzi
09ad6c1663 Auto-generated clang-tidy review comments (#1360) 2025-07-04 14:51:48 +01:00
Andrei Pangin
40fd71a8a0 #1358: Do not dereference jmethodIDs on JDK 26 (#1362) 2025-07-04 14:03:10 +01:00
Andrei Pangin
557f4adecb Fix nonjava test failure on Alpine 2025-07-03 20:13:37 +01:00
Andrei Pangin
de54c536dc Do not include excess files in test.jar 2025-07-03 19:36:36 +01:00
Andrei Pangin
c74107e53f Suppress javac warnings when compiling tests 2025-07-03 19:08:00 +01:00
Francesco Andreuzzi
b3968f5e38 Simplify location handling in OTLP (#1361) 2025-07-02 20:45:14 +01:00
Bara' Hasheesh
29dd537907 Correctly unwind stack for malloc events in VM stack walking mode (#1357) 2025-07-01 19:41:54 +01:00
Bara' Hasheesh
0330a6e333 Allow cstack=vmx for native applications (#1354) 2025-07-01 15:29:22 +01:00
Francesco Andreuzzi
9b44c2e99d C++ linting via clang-tidy (#1338) 2025-07-01 12:57:45 +01:00
Kerem Kat
5b4450b85c Fix invalid alignment in mallocTracer and zero-init buf in getTotalCpuTime (#1351) 2025-06-26 16:57:26 +01:00
Andrei Pangin
82d13772a5 Disable JFR OldObjectSample event in jfrsync mode (#1350) 2025-06-25 23:03:03 +01:00
Andrei Pangin
bbca9f1817 [test] Avoid listing files in /tmp 2025-06-18 12:20:48 +01:00
Bara' Hasheesh
981619680e Change stackwalker test checks to be more restrictive (#1341) 2025-06-13 17:12:42 +01:00
Francesco Andreuzzi
2b556680dc Use Index in jfrMetadata (#1337) 2025-06-13 12:47:00 +01:00
Francesco Andreuzzi
b3f58429f5 Support for OTLP Profile signals (#1188) 2025-06-12 01:11:46 +01:00
Andrei Pangin
2844e6c5c1 #1327: Merged jattach memory leak fixes
Co-authored-by: tteokbokki-master <0jin.git@gmail.com>
2025-06-09 14:34:06 +01:00
Andrei Pangin
0e1008531b Fixed misc compilation and test failures 2025-06-05 19:57:35 +01:00
Bara' Hasheesh
19ad42cd23 Enable native memory profiling of async-profiler itself (#1323) 2025-06-03 16:31:02 +01:00
Bara' Hasheesh
f76833a2c0 Add integration test for VM/VMX stack walkers for incomplete frame edge cases (#1321) 2025-06-02 14:04:17 +01:00
Andrei Pangin
4b1df29aab #1319: Accessing osThreadId of a terminating thread may fail (#1324) 2025-06-02 01:43:51 +01:00
Bara' Hasheesh
795da942f7 Enable unit tests related to symbol parsing on macOS (#1315) 2025-06-01 18:18:42 +01:00
Bara' Hasheesh
bedffcb080 Updated tests to verify symbol patching on macOS (#1279) 2025-05-25 13:19:36 +01:00
Andrei Pangin
660ffcd5c6 #1193: Parse non-lazy symbol pointers on macOS
Co-authored-by: Bara' Hasheesh <bara.hasheesh@gmail.com>
2025-05-25 12:54:26 +01:00
Bara' Hasheesh
60e79e364a Prevent from exceeding MAX_NATIVE_LIBS limit (#1312) 2025-05-23 15:41:00 +01:00
Andrei Pangin
d89ab7a16c Skip last 10% allocations for leak detection (#1299) 2025-05-21 13:28:48 +01:00
Francesco Andreuzzi
d042e0a8db Fix comptask test flakyness on JDK8 (#1307) 2025-05-21 10:34:24 +01:00
Andrei Pangin
3256fde4c1 Do not count tests that are not in the include list 2025-05-21 01:15:44 +01:00
Francesco Andreuzzi
3bbab49e3c Add corretto-8 to test matrix (#1274) 2025-05-20 15:38:37 +01:00
Francesco Andreuzzi
ed57317281 Test comptask feature (#1293) 2025-05-20 15:34:03 +01:00
Francesco Andreuzzi
c17de4c220 Guard hook installation with dlopen/dlclose (#1264) 2025-05-20 02:39:43 +01:00
Andrei Pangin
3a9252c677 Allow profiling kprobes/uprobes with --fdtransfer (#1300) 2025-05-19 19:03:21 +01:00
Francesco Andreuzzi
fd8ba8b9ee Fix typo in JfrTests (#1303) 2025-05-19 19:02:45 +01:00
Andrei Pangin
5fe1c47ee3 Removed unused imports 2025-05-18 01:36:21 +01:00
Andrei Pangin
ff203f391a Make sure siginfo is passed to custom SEGV handler (#1298) 2025-05-18 00:41:32 +01:00
Francesco Andreuzzi
ba93f813a9 Protobuf writer (#1292) 2025-05-15 16:12:30 +01:00
JugadK
7c3aa59ceb Record which CPU a sample was taken on (#1286) 2025-05-13 15:30:29 +01:00
Vishesh Ruparelia
25ddfe056b Support "--all" profiling mode (#1281) 2025-05-13 13:41:24 +01:00
Andrei Pangin
7d4157b1c5 Moved and updated policy files 2025-05-07 03:16:58 +01:00
Kerem Kat
b3907b43ed Include debug symbols in the release for the lib (#1271) 2025-05-06 22:24:42 +01:00
Kerem Kat
cf39c3ad9c Remove flaky RaceToLocks test (#1282) 2025-05-06 21:33:13 +01:00
Francesco Andreuzzi
73f0486946 Redirect only stderr from taskset in integration tests (#1276) 2025-05-01 20:01:11 +01:00
Francesco Andreuzzi
f2197dc400 Build and run tests on amazonlinux:2023 and amazonlinux:2 (#1246) 2025-05-01 12:53:45 +01:00
Bara' Hasheesh
8c15cbac99 Fix memory hook installation (#1269) 2025-04-29 17:18:31 +01:00
Francesco Andreuzzi
d2c85c18c6 Build and run tests on Alpine (#1226) 2025-04-28 19:49:49 +01:00
Andrei Pangin
b5b41dcaaa Fix perfEvents tests on systems with restricted perf_event_paranoid 2025-04-28 19:40:10 +01:00
Francesco Andreuzzi
f5fd5b0863 Fix NativememTests#dlopenCustomLib on Alpine (#1254) 2025-04-28 15:55:25 +01:00
Kerem Kat
dbcd94fcd6 Add test name filtering to the java test runner (#1129) 2025-04-28 13:49:39 +01:00
Bara' Hasheesh
570ead9c13 Include asprof.h in async-profiler release (#1262) 2025-04-25 21:52:11 +01:00
Francesco Andreuzzi
471267bea4 Fix nightly publication (#1267) 2025-04-25 16:51:59 +01:00
Francesco Andreuzzi
67076816c1 Separate jobs for build/unit tests and integration tests (#1253) 2025-04-25 16:19:43 +01:00
Francesco Andreuzzi
0c72a8d3e9 Amend amazonlinux:2 image (#1265) 2025-04-25 14:26:08 +01:00
Kerem Kat
fdaf1957c0 Detect and parse the loader in parseLibraries. (#1263) 2025-04-24 19:48:43 +01:00
Francesco Andreuzzi
047a6dea1f Add amazonlinux:2 Docker image with Node.js (#1255) 2025-04-24 17:10:31 +01:00
Francesco Andreuzzi
14c7e819b2 Preferably patch non-dotted symbols when more than one matches the prefix (#1251) 2025-04-22 17:22:36 +01:00
Kerem Kat
fa417c85c8 Add nativemem test with dlopen after profiler start. (#1243) 2025-04-19 01:47:16 +01:00
Francesco Andreuzzi
6d786b7401 Skip compiler optimizations for dummy malloc hooks (#1242) 2025-04-18 12:04:37 +01:00
Francesco Andreuzzi
387dee13b8 Fix musl-specific problems (#1235) 2025-04-17 13:54:31 +01:00
Ariel Ben-Yehuda
164eac4dbd add support for user JFR events (#1223) 2025-04-15 15:12:28 +01:00
Johannes Bechberger
7207fc8775 Replace flamegraph with html in ConverterUsage.md (#1231) 2025-04-15 14:30:14 +01:00
Bara' Hasheesh
b034e4c314 #1174: Detect JVM in non-Java application and attach to it (#1192)
Co-authored-by: Andrei Pangin <noreply@pangin.pro>
2025-04-14 02:16:12 +01:00
Andrei Pangin
39f43006a1 Removed obsolete Makefile target 2025-04-13 21:47:55 +01:00
Andrei Pangin
b9f3456f89 Support PAC on linux-arm64 2025-04-13 19:38:28 +01:00
Andrei Pangin
5ce2c34d3e Fixed unwinding of primordial stack on ARM64 2025-04-13 19:38:27 +01:00
Francesco Andreuzzi
e359d161ba Separate CI jobs for macOS arm64 and x64 (#1212) 2025-04-10 11:38:54 +01:00
Andrei Pangin
7b2d1d9c94 #1222: Update VMStructs for JDK 25 (mutable nmethod data out of CodeCache) 2025-04-10 02:32:47 +01:00
Andrei Pangin
5030fe5faa Minor documentation edits 2025-04-09 02:22:14 +01:00
Andrei Pangin
bc80518125 Update links to v4.0 in the documentation 2025-04-08 18:36:22 +01:00
271 changed files with 12932 additions and 3602 deletions

View File

@@ -3,24 +3,82 @@
<head>
<meta charset='utf-8'>
<style>
body {margin: 0; padding: 10px 10px 22px 10px; background-color: #ffffff}
:root {--bg: #ffffff; --fg: #000000; --hl-bg: #ffffe0; --hl-border: #ffc000; --link: #0366d6; --legend-bg: #ffffe0; --legend-border: #666666}
:root.dark {--bg: #1e1e1e; --fg: #cccccc; --hl-bg: #3a3a00; --hl-border: #8a7000; --link: #58a6ff; --legend-bg: #333333; --legend-border: #888888}
body {margin: 0; padding: 10px 10px 22px 10px; background-color: var(--bg); color: var(--fg)}
h1 {margin: 5px 0 0 0; font-size: 18px; font-weight: normal; text-align: center}
header {margin: -24px 0 5px 0; line-height: 24px}
button {font: 12px sans-serif; cursor: pointer}
p {position: fixed; bottom: 0; margin: 0; padding: 2px 3px 2px 3px; outline: 1px solid #ffc000; display: none; overflow: hidden; white-space: nowrap; background-color: #ffffe0}
a {color: #0366d6}
#hl {position: absolute; display: none; overflow: hidden; white-space: nowrap; pointer-events: none; background-color: #ffffe0; outline: 1px solid #ffc000; height: 15px}
header {margin: -22px 0 6px 0}
button {border: none; background: none; width: 24px; height: 24px; cursor: pointer; margin: 0; padding: 2px 0 0 0; text-align: center}
button:hover {background-color: var(--hl-bg); outline: 1px solid var(--hl-border); border-radius: 4px}
dl {margin: 0 4px 8px 4px}
dt {margin: 1px; padding: 2px 0; font-weight: bold}
dd {margin: 1px; padding: 2px 4px}
dl.frames {float: left; width: 160px}
dl.frames > dd {color: #000000}
dl.hotkeys {clear: left; border-top: 1px solid var(--legend-border)}
dl.hotkeys > dt {float: left; clear: left; width: 158px; margin-right: 4px; text-align: right}
dl.hotkeys > dd {float: left}
p {position: fixed; bottom: 0; margin: 0; padding: 2px 3px 2px 3px; outline: 1px solid var(--hl-border); display: none; overflow: hidden; white-space: nowrap; background-color: var(--hl-bg); color: var(--fg)}
a {color: var(--link)}
#legend {padding: 4px; border-radius: 4px; background: var(--legend-bg); border: 1px solid var(--legend-border); display: none}
#hl {position: absolute; display: none; overflow: hidden; white-space: nowrap; pointer-events: none; background-color: var(--hl-bg); outline: 1px solid var(--hl-border); height: 15px}
#hl span {padding: 0 3px 0 3px}
#status {left: 0}
#match {right: 0}
#reset {cursor: pointer}
#canvas {width: 100%; height: 576px}
</style>
<script>
{
let theme;
try { theme = localStorage.getItem('flame-theme'); } catch (ignored) {}
if (theme ? theme === 'dark' : matchMedia('(prefers-color-scheme: dark)').matches) {
document.documentElement.classList.add('dark');
}
}
</script>
</head>
<body style='font: 12px Verdana, sans-serif'>
<h1>CPU profile</h1>
<header style='text-align: left'><button id='reverse' title='Reverse'>&#x1f53b;</button>&nbsp;&nbsp;<button id='search' title='Search'>&#x1f50d;</button></header>
<header style='text-align: right'>Produced by <a href='https://github.com/async-profiler/async-profiler'>async-profiler</a></header>
<header style='float: left'>
<button id='inverted' title='Invert (I)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 392 392'><path d='M196,36 L316,156 L76,156 Z' fill='#004d80'/><path d='M196,356 L76,236 L316,236 Z' fill='#004d80'/><path d='M196,54 L298,156 L94,156 Z' fill='#ff8d40'/><path d='M196,338 L94,236 L298,236 Z' fill='#40b2ff'/><rect x='94' y='188' width='204' height='16' fill='#004d80'/></svg></button>
<button id='search' title='Search (Ctrl+F)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='-39.3 -39.3 471.1 471.1'><circle cx='147.7' cy='147.8' r='125.9' fill='#fff'/><path fill='#40b2ff' d='M370.7 348.7c0 1.4-1.6 6.3-7.2 12.3-6.2 6.7-12.5 9.8-14.7 9.8h-.1c-19.5-1.6-62-43.2-109.6-106.8 9.2-7.2 17.5-15.5 24.6-24.6 63.6 47.6 105.2 90.2 106.8 109.6z'/><path fill='#ff8d40' d='M208.7 86.9l-14.5 14.5c-17.1 17.1-46.5 5-46.5-19.3V61.6c-49 0-88.4 40.8-86.1 90.2 2 43.9 38.1 80 82 82 49.5 2.3 90.2-37.2 90.2-86.1 0-23.7-9.6-45.2-25.1-60.8z'/><path fill='#004d80' d='M276.1 221c12.3-21.5 19.5-46.5 19.5-73.2C295.6 66.3 229.2.1 147.7.1S0 66.3 0 147.9s66.3 147.7 147.7 147.7c26.6 0 51.5-7.1 73.2-19.5 39.8 53.3 91.9 113.5 126.1 116.4 12.3.5 22.9-6.7 32.8-16.7 5.2-5.6 13.8-16.9 12.8-28.8-2.9-34.1-63.1-86.2-116.4-126.1zM147.7 273.8c-69.5 0-125.9-56.5-125.9-125.9S78.3 21.9 147.7 21.9 273.6 78.4 273.6 147.8s-56.4 126-125.9 126zm215.9 87.2c-6.2 6.7-12.4 9.8-14.7 9.8h-.1c-19.5-1.6-62-43.2-109.6-106.8 9.2-7.2 17.5-15.5 24.6-24.6 63.6 47.6 105.2 90.2 106.8 109.6 0 1.4-1.6 6.3-7.2 12.4z'/></svg></button>
<button id='darkmode' title='Toggle dark mode (D)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 20 20'><path d='M10 4a6 6 0 0 1 0 12z' fill='#ff8d40'/><path d='M10 4a6 6 0 0 0 0 12z' fill='#ffffff'/><circle cx='10' cy='10' r='8' fill='none' stroke='#004d80'/></svg></button>
<button id='info'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 20 20'><circle cx='10' cy='10' r='8' stroke='#004d80' fill='none'/><path d='M10 5.5c-1.25 0-2.25 1-2.25 2.25H9a1.25 1.25 0 0 1 2.5 0c0 .65-.55 1-1 1.2-.7.35-1.25.85-1.25 1.8V11h1.5v-.25c0-.37.29-.65.68-.83.73-.34 1.32-.87 1.32-2.17 0-1.25-1.5-2.25-2.75-2.25' fill='#ff8d40' stroke='#ff8d40' stroke-width='.6' stroke-linecap='round' stroke-linejoin='round'/><circle cx='10' cy='13.5' r='1.2' fill='#ff8d40'/></svg></button>
</header>
<header style='float: right'>Produced by <a href='https://github.com/async-profiler/async-profiler'>async-profiler</a></header>
<div id='legend' style='position: absolute'>
<dl class='frames'>
<dt>Frame types</dt>
<dd style='background-color: #e17d00'>Kernel</dd>
<dd style='background-color: #e15a5a'>Native</dd>
<dd style='background-color: #c8c83c'>C++ (VM)</dd>
<dd style='background-color: #50e150'>Java compiled</dd>
<dd style='background-color: #cce880'>Java compiled by C1</dd>
<dd style='background-color: #50cccc'>Inlined</dd>
<dd style='background-color: #b2e1b2'>Interpreted</dd>
</dl>
<dl class='frames'>
<dt>Allocation profile</dt>
<dd style='background-color: #50cccc'>Allocated class</dd>
<dd style='background-color: #e17d00'>Allocation outside TLAB</dd>
<dt>Lock profile</dt>
<dd style='background-color: #50cccc'>Lock class</dd>
<dt>&nbsp;</dt>
<dt>Search</dt>
<dd style='background-color: #ee00ee'>Matches regexp</dd>
</dl>
<dl class='hotkeys'>
<dt>Click frame</dt><dd>Zoom into frame</dd>
<dt>Ctrl/Alt+Click</dt><dd>Remove stack</dd>
<dt>0</dt><dd>Reset zoom</dd>
<dt>I</dt><dd>Invert graph</dd>
<dt>Ctrl+F</dt><dd>Search</dd>
<dt>N</dt><dd>Next match</dd>
<dt>Shift+N</dt><dd>Previous match</dd>
<dt>Esc</dt><dd>Cancel search</dd>
</dl>
</div>
<canvas id='canvas'></canvas>
<div id='hl'><span></span></div>
<p id='status'></p>
@@ -30,9 +88,11 @@
// SPDX-License-Identifier: Apache-2.0
'use strict';
let root, px, pattern;
let level0 = 0, left0 = 0, width0 = 0;
let level0 = 0, left0 = 0, width0 = 0, d = 0;
let nav = [], navIndex, matchval;
let reverse = false;
let inverted = false;
const U = undefined;
const maxdiff = -1;
const levels = Array(36);
for (let h = 0; h < levels.length; h++) {
levels[h] = [];
@@ -66,10 +126,18 @@
return '#' + (p[0] + ((p[1] * v) << 16 | (p[2] * v) << 8 | (p[3] * v))).toString(16);
}
function getDiffColor(diff) {
if (diff === U) return '#ffdd33';
if (diff === 0) return '#e0e0e0';
const v = Math.round(128 * (maxdiff - Math.abs(diff)) / maxdiff) + 96;
return diff > 0 ? 'rgb(255,' + v + ',' + v + ')' : 'rgb(' + v + ',' + v + ',255)';
}
function f(key, level, left, width, inln, c1, int) {
levels[level0 = level].push({level, left: left0 += left, width: width0 = width || width0,
color: getColor(palette[key & 7]), title: cpool[key >>> 3],
details: (int ? ', int=' + int : '') + (c1 ? ', c1=' + c1 : '') + (inln ? ', inln=' + inln : '')
color: maxdiff >= 0 ? getDiffColor(d) : getColor(palette[key & 7]),
title: cpool[key >>> 3],
details: (d ? (d > 0 ? ', +' : ', ') + d : '') + (int ? ', int=' + int : '') + (c1 ? ', c1=' + c1 : '') + (inln ? ', inln=' + inln : '')
});
}
@@ -141,8 +209,10 @@
}
function render(newRoot, nav) {
const bg = getComputedStyle(document.documentElement).getPropertyValue('--bg');
if (root) {
c.fillStyle = '#ffffff';
c.fillStyle = bg;
c.fillRect(0, 0, canvasWidth, canvasHeight);
}
@@ -184,14 +254,14 @@
}
if (f.level < root.level) {
c.fillStyle = 'rgba(255, 255, 255, 0.5)';
c.fillStyle = bg + '80';
c.fillRect((f.left - x0) * px, y, f.width * px, 15);
}
}
}
for (let h = 0; h < levels.length; h++) {
const y = reverse ? h * 16 : canvasHeight - (h + 1) * 16;
const y = inverted ? h * 16 : canvasHeight - (h + 1) * 16;
const frames = levels[h];
for (let i = 0; i < frames.length; i++) {
drawFrame(frames[i], y);
@@ -208,20 +278,20 @@
}
canvas.onmousemove = function() {
const h = Math.floor((reverse ? event.offsetY : (canvasHeight - event.offsetY)) / 16);
const h = Math.floor((inverted ? event.offsetY : (canvasHeight - event.offsetY)) / 16);
if (h >= 0 && h < levels.length) {
const f = findFrame(levels[h], event.offsetX / px + root.left);
if (f) {
if (f !== root) getSelection().removeAllRanges();
hl.style.left = (Math.max(f.left - root.left, 0) * px + canvas.offsetLeft) + 'px';
hl.style.width = (Math.min(f.width, root.width) * px) + 'px';
hl.style.top = ((reverse ? h * 16 : canvasHeight - (h + 1) * 16) + canvas.offsetTop) + 'px';
hl.style.top = ((inverted ? h * 16 : canvasHeight - (h + 1) * 16) + canvas.offsetTop) + 'px';
hl.firstChild.textContent = f.title;
hl.style.display = 'block';
canvas.title = f.title + '\n(' + samples(f.width) + f.details + ', ' + pct(f.width, levels[0][0].width) + '%)';
canvas.style.cursor = 'pointer';
canvas.onclick = function() {
if (event.altKey && h >= root.level) {
if ((event.altKey || event.ctrlKey) && h >= root.level && h > 0) {
removeStack(f.left, f.width);
root.width > f.width ? render(root) : render();
} else if (f !== root) {
@@ -249,8 +319,8 @@
getSelection().selectAllChildren(hl);
}
document.getElementById('reverse').onclick = function() {
reverse = !reverse;
document.getElementById('inverted').onclick = function() {
inverted = !inverted;
render();
}
@@ -262,18 +332,51 @@
search(false);
}
document.getElementById('darkmode').onclick = function() {
const theme = document.documentElement.classList.toggle('dark') ? 'dark' : 'light';
try { localStorage.setItem('flame-theme', theme); } catch (ignored) {}
render(root);
}
const btnInfo = document.getElementById('info');
const legend = document.getElementById('legend');
btnInfo.onmouseover = function() {
legend.style.left = (btnInfo.offsetLeft + 24) + 'px';
legend.style.top = (btnInfo.offsetTop + 24) + 'px';
legend.style.display = 'block';
}
btnInfo.onmouseout = function() {
legend.style.display = 'none';
}
window.onkeydown = function(event) {
if ((event.ctrlKey || event.metaKey) && event.key === 'f') {
event.preventDefault();
search(true);
return false;
} else if (event.key === 'Escape') {
search(false);
} else if ((event.key === 'n' || event.key === 'N') && nav.length > 0) {
navIndex = (navIndex + (event.shiftKey ? nav.length - 1 : 1)) % nav.length;
render(nav[navIndex]);
document.getElementById('matchval').textContent = matchval + ' (' + (navIndex + 1) + ' of ' + nav.length + ')';
window.scroll(0, reverse ? root.level * 16 : canvasHeight - (root.level + 1) * 16);
window.scroll(0, inverted ? root.level * 16 : canvasHeight - (root.level + 1) * 16);
canvas.onmousemove();
return false;
} else if (event.key === 'i') {
canvas.onmouseout();
document.getElementById('inverted').onclick();
return false;
} else if (event.key === 'd') {
document.getElementById('darkmode').onclick();
return false;
} else if (event.key === '0') {
canvas.onmouseout();
root = levels[0][0];
search(false);
return false;
}
}
@@ -492,7 +595,7 @@ f(484,12,2,3)
u(476)
f(68,10,3,23)
f(412,11,2,16)
u(380,15)
f(380,12,1,15)
f(372,13,1,3)
n(388,2)
n(396)
@@ -501,7 +604,7 @@ n(428,5)
f(436,14,1,2)
n(444)
u(420)
f(452,11,3,3)
f(452,11,2,3)
u(468)
f(460,13,1,2)
f(492,11,2)
@@ -539,7 +642,7 @@ u(108)
f(1444,15,12,50)
f(1452,16,1,22)
f(124,17,2,20)
f(156,18,9,8)
f(156,18,10,8)
f(228,19,2,2)
n(508)
n(605)
@@ -552,7 +655,7 @@ u(613)
u(629)
u(621)
f(579,18,2)
f(1460,16,3,27)
f(1460,16,2,27)
f(124,17,11,16)
f(156,18,13,3)
u(605)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

132
.clang-tidy Normal file
View File

@@ -0,0 +1,132 @@
Checks: >
-*,
bugprone-assert-side-effect,
bugprone-bool-pointer-implicit-conversion,
bugprone-chained-comparison,
bugprone-copy-constructor-init,
bugprone-incorrect-roundings,
bugprone-infinite-loop,
bugprone-integer-division,
bugprone-misplaced-operator-in-strlen-in-alloc,
bugprone-misplaced-pointer-arithmetic-in-alloc,
bugprone-misplaced-widening-cast,
bugprone-non-zero-enum-to-bool-conversion,
bugprone-pointer-arithmetic-on-polymorphic-object,
bugprone-posix-return,
bugprone-redundant-branch-condition,
bugprone-return-const-ref-from-parameter,
bugprone-sizeof-container,
bugprone-standalone-empty,
bugprone-string-literal-with-embedded-nul,
bugprone-string-integer-assignment,
bugprone-suspicious-include,
bugprone-suspicious-memset-usage,
bugprone-suspicious-missing-comma,
bugprone-suspicious-realloc-usage,
bugprone-suspicious-semicolon,
bugprone-suspicious-string-compare,
bugprone-swapped-arguments,
bugprone-terminating-continue,
bugprone-too-small-loop-variable,
bugprone-undefined-memory-manipulation,
bugprone-undelegated-constructor,
bugprone-unhandled-self-assignment,
bugprone-unused-raii,
bugprone-unused-return-value,
bugprone-use-after-move,
bugprone-virtual-near-miss,
cppcoreguidelines-misleading-capture-default-by-value,
cppcoreguidelines-pro-type-const-cast,
cppcoreguidelines-slicing,
cert-oop58-cpp,
cert-flp30-c,
misc-confusable-identifiers,
misc-definitions-in-headers,
misc-header-include-cycle,
misc-misplaced-const,
misc-non-copyable-objects,
misc-redundant-expression,
misc-static-assert,
misc-unconventional-assign-operator,
misc-unused-alias-decls,
performance-avoid-endl,
performance-faster-string-find,
performance-for-range-copy,
performance-implicit-conversion-in-loop,
performance-inefficient-algorithm,
performance-inefficient-string-concatenation,
performance-inefficient-vector-operation,
performance-move-const-arg,
performance-move-constructor-init,
performance-no-automatic-move,
performance-noexcept-destructor,
performance-noexcept-move-constructor,
performance-noexcept-swap,
performance-trivially-destructible,
performance-type-promotion-in-math-fn,
performance-unnecessary-copy-initialization,
performance-unnecessary-value-param,
readability-avoid-return-with-void-value,
readability-avoid-unconditional-preprocessor-if,
readability-const-return-type,
readability-container-contains,
readability-container-data-pointer,
readability-container-size-empty,
readability-delete-null-pointer,
readability-duplicate-include,
readability-function-size,
readability-identifier-naming,
readability-misleading-indentation,
readability-misplaced-array-index,
readability-named-parameter,
readability-operators-representation,
readability-qualified-auto,
readability-redundant-access-specifiers,
readability-redundant-casting,
readability-redundant-control-flow,
readability-redundant-declaration,
readability-redundant-function-ptr-dereference,
readability-redundant-preprocessor,
readability-redundant-string-cstr,
readability-redundant-string-init,
readability-reference-to-constructed-temporary,
readability-simplify-subscript-expr,
readability-static-accessed-through-instance,
readability-static-definition-in-anonymous-namespace,
readability-string-compare,
readability-uniqueptr-delete-release,
readability-use-anyofallof,
# TODO: Consider these
# bugprone-switch-missing-default-case
# bugprone-multi-level-implicit-pointer-conversion
# bugprone-branch-clone
# cert-err33-c
# cppcoreguidelines-narrowing-conversions
# cppcoreguidelines-init-variables
# cppcoreguidelines-explicit-virtual-functions
# cppcoreguidelines-special-member-functions
# llvm-include-order
# misc-const-correctness
# modernize-*
# performance-enum-size
# readability-function-cognitive-complexity
# readability-else-after-return
# readability-convert-member-functions-to-static
# readability-math-missing-parentheses
# readability-non-const-parameter
# readability-redundant-member-init
# readability-simplify-boolean-expr
# misc-include-cleaner
# google-explicit-constructor
# cppcoreguidelines-virtual-class-destructor
# readability-make-member-function-const
HeaderFilterRegex: "*"
CheckOptions:
- key: readability-identifier-naming.LocalVariableCase
value: lower_case
- key: readability-identifier-naming.LocalVariableIgnoredRegexp
value: '(KB|Thread|setDaemon|klassOop|nVMs|loadLibrary|getTicksFrequency|counterTime|System|M|R|s_)'
- key: readability-identifier-naming.PrivateMemberPrefix
value: _
- key: readability-identifier-naming.ConstexprVariableCase
value: UPPER_CASE

View File

@@ -2,7 +2,6 @@
name: "🐛 Bug Report"
description: Report a bug
title: "(short issue description)"
labels: [bug]
assignees: []
body:
- type: textarea

83
.github/workflows/build.yml vendored Normal file
View File

@@ -0,0 +1,83 @@
name: build-template
on:
workflow_call:
inputs:
platform:
type: string
required: true
runner:
type: string
required: true
container-image:
type: string
required: false
env:
build_java_distribution: corretto
build_java_version: 11
permissions:
contents: read
jobs:
build:
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.container-image && format('public.ecr.aws/async-profiler/asprof-builder-{0}', inputs.container-image) || '' }}
name: "build and unit test (${{ inputs.platform }})"
steps:
- name: Run container setup
if: inputs.container-image != ''
run: "[ ! -f /root/setup.sh ] || /root/setup.sh"
- name: Setup Java
uses: actions/setup-java@v4
with:
distribution: ${{ env.build_java_distribution }}
java-version: ${{ env.build_java_version }}
- name: Checkout sources
uses: actions/checkout@v4
- name: Build and unit test
id: build
run: |
set -x
HASH=${GITHUB_SHA:0:7}
case "${{ inputs.platform }}" in
macos*)
brew install gcovr
make COMMIT_TAG=$HASH FAT_BINARY=true release coverage -j
;;
*)
make COMMIT_TAG=$HASH CC=/usr/local/musl/bin/musl-gcc release coverage -j
echo "debug_archive=$(find . -type f -name "async-profiler-*-debug*" -exec basename {} \;)" >> $GITHUB_OUTPUT
;;
esac
echo "archive=$(find . -type f -name "async-profiler-*" -not -name "*-debug*" -exec basename {} \;)" >> $GITHUB_OUTPUT
shell: bash
env:
GITHUB_SHA: ${{ github.sha }}
- name: Set artifact name
id: set_artifact_name
run: echo "artifact_name=async-profiler-${{ inputs.platform }}-${GITHUB_SHA:0:7}" >> $GITHUB_OUTPUT
shell: bash
env:
GITHUB_SHA: ${{ github.sha }}
- name: Upload binaries
uses: actions/upload-artifact@v4
with:
name: ${{ steps.set_artifact_name.outputs.artifact_name }}
path: ${{ steps.build.outputs.archive }}
if-no-files-found: error
- name: Upload debug info
uses: actions/upload-artifact@v4
if: inputs.platform != 'macos'
with:
name: ${{ steps.set_artifact_name.outputs.artifact_name }}-debug
path: ${{ steps.build.outputs.debug_archive }}
if-no-files-found: error
- name: Upload coverage report
uses: actions/upload-artifact@v4
with:
name: test-coverage-${{ inputs.platform }}
path: build/test/coverage/
if-no-files-found: error

48
.github/workflows/clang-tidy-review.yml vendored Normal file
View File

@@ -0,0 +1,48 @@
name: clang-tidy-review
on:
workflow_run:
workflows:
- code-check
types:
- completed
jobs:
clang-tidy-results:
if: ${{ github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success' }}
runs-on: ubuntu-latest
container:
image: "public.ecr.aws/async-profiler/asprof-code-check:latest"
permissions:
pull-requests: write
contents: write
actions: read
steps:
- name: Download code-check artifacts
uses: actions/download-artifact@v4
with:
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ secrets.GITHUB_TOKEN }}
name: code-check-artifacts
path: /tmp/code-check-artifacts/
- name: Read PR information
id: pr_info
run: |
cd /tmp/code-check-artifacts
echo "pr_id=$(cat pr-id.txt)" >> "$GITHUB_OUTPUT"
echo "pr_head_repo=$(cat pr-head-repo.txt)" >> "$GITHUB_OUTPUT"
echo "pr_head_sha=$(cat pr-head-sha.txt)" >> "$GITHUB_OUTPUT"
- uses: actions/checkout@v4
with:
repository: ${{ steps.pr_info.outputs.pr_head_repo }}
ref: ${{ steps.pr_info.outputs.pr_head_sha }}
persist-credentials: false
- name: Run clang-tidy-pr-comments action
uses: platisd/clang-tidy-pr-comments@v1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
clang_tidy_fixes: /tmp/code-check-artifacts/clang-tidy-fixes.yml
pull_request_id: ${{ steps.pr_info.outputs.pr_id }}
python_path: python
auto_resolve_conversations: true
suggestions_per_comment: 100

49
.github/workflows/code-check.yml vendored Normal file
View File

@@ -0,0 +1,49 @@
name: code-check
on:
- pull_request
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
cpp-lint:
runs-on: ubuntu-latest
container:
image: "public.ecr.aws/async-profiler/asprof-code-check:latest"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 0
- name: Mark repo as safe for Git
run: git config --global --add safe.directory $GITHUB_WORKSPACE
- name: Fetch base branch
run: |
git remote add upstream "https://github.com/${{ github.event.pull_request.base.repo.full_name }}"
git fetch --no-tags --no-recurse-submodules upstream "${{ github.event.pull_request.base.ref }}"
- name: Create artifacts directory
run: |
mkdir code-check-artifacts/
echo "${{ github.event.number }}" > code-check-artifacts/pull-request-id.txt
- name: Run clang-tidy
run: |
set pipefail
make cpp-lint-diff \
DIFF_BASE="$(git merge-base HEAD "upstream/${{ github.event.pull_request.base.ref }}")" \
CLANG_TIDY_ARGS_EXTRA="-export-fixes code-check-artifacts/clang-tidy-fixes.yml"
shell: bash
- name: Save PR information
run: |
echo "${{ github.event.number }}" > code-check-artifacts/pr-id.txt
echo "${{ github.event.pull_request.head.repo.full_name }}" > code-check-artifacts/pr-head-repo.txt
echo "${{ github.event.pull_request.head.sha }}" > code-check-artifacts/pr-head-sha.txt
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: code-check-artifacts
path: code-check-artifacts/

119
.github/workflows/integ.yml vendored Normal file
View File

@@ -0,0 +1,119 @@
name: integration-test-template
on:
workflow_call:
inputs:
test-platform:
type: string
required: true
platform:
type: string
required: true
architecture:
type: string
required: false
java-version:
type: string
required: true
java-distribution:
type: string
required: false
default: "corretto"
runner:
type: string
required: true
container-image:
type: string
required: false
container-volumes:
type: string
required: false
use-builtin-jdk:
type: boolean
required: false
default: false
retry-count:
type: number
required: false
default: 0
permissions:
contents: read
jobs:
integration-test:
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.container-image && format('public.ecr.aws/async-profiler/asprof-builder-{0}', inputs.container-image) || '' }}
options: --privileged
volumes: ${{ fromJSON(inputs.container-volumes || '[]') }}
name: "${{ inputs.test-platform }}, ${{ inputs.java-distribution }} ${{ inputs.java-version }}"
steps:
- name: Run container setup
if: inputs.container-image != ''
run: "[ ! -f /root/setup.sh ] || /root/setup.sh"
- name: Setup Java
uses: actions/setup-java@v4
# https://github.com/actions/setup-java/issues/678#issuecomment-2446279753
if: ${{ !inputs.use-builtin-jdk }}
with:
distribution: ${{ inputs.java-distribution }}
java-version: ${{ inputs.java-version }}
architecture: ${{ inputs.architecture }}
- name: Checkout sources
uses: actions/checkout@v4
- name: Set variables
id: set_variables
run: |
echo "short_sha=${GITHUB_SHA:0:7}" >> $GITHUB_OUTPUT
echo "artifact_name=async-profiler-${{ inputs.platform }}-${GITHUB_SHA:0:7}" >> $GITHUB_OUTPUT
shell: bash
env:
GITHUB_SHA: ${{ github.sha }}
- name: Download async-profiler release artifact
uses: actions/download-artifact@v4
with:
name: ${{ steps.set_variables.outputs.artifact_name }}
path: async_profiler_release
- name: Download async-profiler JAR artifacts
uses: actions/download-artifact@v4
with:
name: async-profiler-jars
path: jar_artifacts
- name: Extract async-profiler artifact
id: extract_artifact
run: |
release_archive=$(basename $(find async_profiler_release -type f -iname "async-profiler-*" ))
case "${{ inputs.runner }}" in
macos*)
unzip async_profiler_release/$release_archive
;;
*)
tar xvf async_profiler_release/$release_archive
;;
esac
echo "jars_directory=jar_artifacts" >> $GITHUB_OUTPUT
echo "release_directory=$(basename $(find . -type d -iname "async-profiler-*" ))" >> $GITHUB_OUTPUT
- name: Download Protobuf Java runtime
run: |
mkdir -p test/deps
cd test/deps
curl -L -O "https://repo1.maven.org/maven2/com/google/protobuf/protobuf-java/$PB_JAVA_VERSION/protobuf-java-$PB_JAVA_VERSION.jar"
env:
PB_JAVA_VERSION: "4.31.1"
- name: Run integration tests
run: |
mkdir -p build/jar
cp ${{ steps.extract_artifact.outputs.jars_directory }}/* build/jar
make build/test.jar
cp -r ${{ steps.extract_artifact.outputs.release_directory }}/bin build
cp -r ${{ steps.extract_artifact.outputs.release_directory }}/lib build
make test-java TEST_THREADS=2 RETRY_COUNT=${{ inputs.retry-count }} -j
- name: Upload integration test logs
uses: actions/upload-artifact@v4
if: always()
with:
name: integration-test-logs-${{ inputs.test-platform }}-${{ inputs.java-version }}-${{ steps.set_variables.outputs.short_sha }}
path: |
build/test/logs/
hs_err*.log

View File

@@ -4,6 +4,13 @@ on:
- push
- pull_request
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
license-header:
runs-on: ubuntu-latest
@@ -19,3 +26,34 @@ jobs:
run: |
npm install -g prettier@3.4.2
make check-md
eof-newline:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: EOF newline check
env:
offenders_path: /tmp/eof_newline_offenders.txt
run: |
find . -path './.git' -prune -o -exec file --mime-type {} + | grep 'text/' | awk -F: '{print $1}' | while read -r file; do
# Read last byte and verify it's a newline
if [ -s "$file" ] && [ "$(tail -c1 "$file" | wc -l)" -eq 0 ]; then
echo "$file" >> "$offenders_path"
fi
done
if [ -s "$offenders_path" ]; then
cat "$offenders_path"
exit 1
fi
trailing-spaces:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Trailing spaces check
env:
offenders_path: /tmp/trailing_space_offenders.txt
run: |
grep -rIlE --exclude-dir=.git '[[:blank:]]+$' . > "$offenders_path" || true
if [ -s "$offenders_path" ]; then
cat "$offenders_path"
exit 1
fi

View File

@@ -1,144 +1,158 @@
name: Test and Publish Nightly Builds
name: CI
on: # We are very liberal in terms of triggering builds. This should be revisited if we start seeing a lot of queueing
- push
- pull_request
- workflow_dispatch
env:
RELEASE_FROM_JAVA_VERSION: '11'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
build-jars:
runs-on: ubuntu-latest
name: build / jars
steps:
- name: Checkout sources
uses: actions/checkout@v4
- name: Build jars
- name: Build JARs
run: make jar
- name: Upload async-profiler.jar
- name: Upload JARs
uses: actions/upload-artifact@v4
with:
name: async-profiler.jar
path: build/jar/async-profiler.jar
- name: Upload jfr-converter.jar
uses: actions/upload-artifact@v4
with:
name: jfr-converter.jar
path: build/jar/jfr-converter.jar
build-binaries-and-test:
name: async-profiler-jars
path: build/jar/*
if-no-files-found: error
build-linux-arm64:
name: build / linux-arm64
uses: ./.github/workflows/build.yml
with:
platform: linux-arm64
runner: ubuntu-24.04-arm
container-image: "arm:latest"
build-linux-x64:
name: build / linux-x64
uses: ./.github/workflows/build.yml
with:
platform: linux-x64
runner: ubuntu-latest
container-image: x86:latest
build-macos:
name: build / macos
uses: ./.github/workflows/build.yml
with:
platform: macos
runner: macos-15
integ-linux-x64:
name: integ / linux-x64
needs: [build-linux-x64, build-jars]
strategy:
fail-fast: false
matrix:
runson:
- display: linux-x64
name: ubuntu-latest # Using "latest" here as the build and test will any ways run inside a container which we control
java-version: [11, 17, 21, 24]
test-platform: [linux-x64]
java-version: [8, 11, 17, 21, 25]
java-distribution: [corretto]
container: ["public.ecr.aws/async-profiler/asprof-builder-x86:latest"]
container-image: [x86:latest]
include:
- runson:
display: macos-14-arm64
name: macos-14
- test-platform: linux-x64-alpine
container-image: alpine:corretto-11
use-builtin-jdk: true
java-distribution: corretto
java-version: 11
- test-platform: linux-x64-AL2
container-image: amazonlinux:2
# GHA provides Node.js by attaching a volume to the container. The container path is
# '/__e/node20', and it's not writable unless we override it via 'container.volumes'.
container-volumes: '["/tmp/node20:/__e/node20"]'
java-version: 11
java-distribution: corretto
container: "" # Not using container for mac-os as we have images only for linux
- runson:
display: linux-arm64
name: ubuntu-24.04-arm # There is no "latest" tag available (yet) as ARM runners are still in public preview
- test-platform: linux-x64-AL2023
container-image: amazonlinux:2023
java-version: 11
java-distribution: corretto
container: "public.ecr.aws/async-profiler/asprof-builder-arm:latest"
runs-on: ${{ matrix.runson.name }}
container:
image: ${{ matrix.container }}
options: --privileged
name: "test (${{ matrix.runson.display }}, ${{ matrix.java-distribution }} ${{ matrix.java-version }})"
steps:
- name: Setup Java for macOS x64
uses: actions/setup-java@v4
if: ${{ matrix.runson.name == 'macos-14' }}
with:
distribution: ${{ matrix.java-distribution }}
java-version: ${{ matrix.java-version }}
architecture: x64 # set up for x64, as the default arm one will override this later
- name: Setup Java for default architecture
uses: actions/setup-java@v4
with:
distribution: ${{ matrix.java-distribution }}
java-version: ${{ matrix.java-version }}
# architecture: not specifying this defaults to architecture of the runner
- name: Checkout sources
uses: actions/checkout@v4
- name: Build and test
id: build
run: |
HASH=$(echo ${{ github.sha }} | cut -c-7)
case "${{ matrix.runson.name }}" in
macos*)
make COMMIT_TAG=$HASH FAT_BINARY=true release test -j
;;
*)
make COMMIT_TAG=$HASH CC=/usr/local/musl/bin/musl-gcc release test -j
;;
esac
echo "archive=$(basename $(find . -type f -iname "async-profiler-*" ))" >> $GITHUB_OUTPUT
- name: Coverage
id: coverage
run: |
HASH=$(echo ${{ github.sha }} | cut -c-7)
case "${{ matrix.runson.name }}" in
macos*)
brew install gcovr
make COMMIT_TAG=$HASH FAT_BINARY=true coverage -j
;;
*)
make COMMIT_TAG=$HASH CC=/usr/local/musl/bin/musl-gcc coverage -j
;;
esac
- name: Upload test logs for default architecture
uses: actions/upload-artifact@v4
- test-platform: linux-x64-alpaquita
container-image: alpaquita:x86_64-liberica-21
use-builtin-jdk: true
java-distribution: liberica
java-version: 21
uses: ./.github/workflows/integ.yml
with:
platform: linux-x64
test-platform: ${{ matrix.test-platform }}
runner: ubuntu-latest
container-image: ${{ matrix.container-image }}
container-volumes: ${{ matrix.container-volumes || '' }}
java-version: ${{ matrix.java-version }}
java-distribution: ${{ matrix.java-distribution }}
use-builtin-jdk: ${{ matrix.use-builtin-jdk || false }}
if: always() # we always want to upload test logs, especially when tests fail
with:
name: test-logs-${{ matrix.runson.display }}-${{ matrix.java-version }}
path: |
build/test/logs/
hs_err*.log
- name: Upload coverage report
uses: actions/upload-artifact@v4
integ-linux-arm64:
name: integ / linux-arm64
needs: [build-linux-arm64, build-jars]
strategy:
fail-fast: false
matrix:
test-platform: [linux-arm64]
java-version: [8, 11, 17, 21, 25]
java-distribution: [corretto]
container-image: [arm:latest]
uses: ./.github/workflows/integ.yml
with:
platform: linux-arm64
test-platform: ${{ matrix.test-platform }}
runner: ubuntu-24.04-arm
container-image: ${{ matrix.container-image }}
container-volumes: ${{ matrix.container-volumes || '' }}
java-version: ${{ matrix.java-version }}
java-distribution: ${{ matrix.java-distribution }}
with:
name: test-coverage-${{ matrix.runson.display }}-${{ matrix.java-version }}
path: build/test/coverage/
- name: Test macOS x64
if: ${{ matrix.runson.name == 'macos-14' }}
run: JAVA_HOME=$JAVA_HOME_${{ env.JAVA_VERSION }}_X64 make test
integ-macos:
name: integ / macos
needs: [build-macos, build-jars]
strategy:
fail-fast: false
matrix:
include:
- runner: macos-15
test-platform: macos-arm64
java-version: "11"
- runner: macos-15
test-platform: macos-arm64
java-version: "21"
- runner: macos-15-intel
test-platform: macos-x64
java-version: "17"
architecture: x64
retry-count: 1
uses: ./.github/workflows/integ.yml
with:
platform: macos
test-platform: ${{ matrix.test-platform }}
runner: ${{ matrix.runner }}
java-version: ${{ matrix.java-version }}
architecture: ${{ matrix.architecture || '' }}
retry-count: ${{ matrix.retry-count || 0 }}
- name: Upload async-profiler binaries to workflow
uses: actions/upload-artifact@v4
if: env.RELEASE_FROM_JAVA_VERSION == matrix.java-version
with:
name: ${{ steps.build.outputs.archive }}
path: ${{ steps.build.outputs.archive }}
- name: Upload test logs for macOS x64
uses: actions/upload-artifact@v4
if: matrix.runson.name == 'macos-14' && always()
with:
name: test-logs-macos-14-x64-${{ matrix.java-version }}
path: |
build/test/logs/
hs_err*.log
publish-only-on-push:
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
permissions:
contents: write
name: publish (nightly)
runs-on: ubuntu-latest
needs: [build-jars, build-binaries-and-test]
needs: [integ-linux-x64, integ-linux-arm64, integ-macos]
steps:
- name: Download async-profiler binaries and jars
uses: actions/download-artifact@v4
with:
pattern: '*.*' # download everything except test logs
pattern: 'async-profiler-*'
merge-multiple: 'true'
- name: Delete previous release and publish new release
uses: actions/github-script@v7

View File

@@ -14,6 +14,8 @@ header:
paths-ignore:
- 'src/jattach'
- 'src/res'
- '**/MANIFEST.MF'
- 'test/**/*.collapsed'
license:
content: |
Copyright The async-profiler authors

View File

@@ -1,5 +1,180 @@
# Changelog
## [4.4]
### Features
- #1553: Differential Flame Graphs
### Improvements
- #1705: `memlimit` option to limit size of the call trace storage
- #1706: Extend syntax of `-j` option to truncate deep stacks
- #1720: FlameGraph: Dark mode toggle
- #1672: FlameGraph: Use Ctrl+Click in addition to Alt+Click to remove stacks
- #1684: Unwind ARM64 generated stubs on JDK 26+
- #1676: Make `dwarf` stack walking mode an alias for `vm`
- #1671: An option to select TLAB based AllocTracer engine with JDK 11+
- #1670: Move converter Main class to the one.convert package
- #1660: Provide non-aggregated samples in OTLP converter
- #1701, #1682: Speed-up stack walking
### Breaking changes
- #1673: Permanently remove `check` command
- #1675: Remove unsafe AsyncGetCallTrace recovery tricks along with `safemode` option
- #1677: Remove `cstack=lbr` option
### Bug fixes
- #1727: Allocation profile has wrong units in OTLP format
- #1716: Wall-clock Heatmap does not count samples correctly
- #1715: Fix Zing crash when profiling cpu+wall together
- #1708: Another fix for correct vDSO unwinding on ARM64
- #1707: Workaround for JFR shutdown race
- #1699: Allow negative keys in JFR constant pool
- #1697: Ensure remaining buffer is sufficient for event data in JfrReader
- #1657: Re-enable workaround for a long attach on JDK 8
- #1654: Prefer perf-events engine when record-cpu or target-cpu are selected
- #1585: Scale perf counters in case of multiplexing
- #1528: Add a hard-coded limit on the maximum number of jmethodIDs
- #1203: Fix "Instance field not found" when using `-Xcheck:jni` on JDK 8
- Do not walk past virtual thread continuation barriers
## [4.3] - 2026-01-20
### Features
- #1547: Native lock profiling
- #1566: Filter cpu/wall profiles by latency
- #1568: Expose async-profiler metrics in Prometheus format
- #1628: async-profiler.jar as Java agent; remote control via JMX
### Improvements
- #1140: FlameGraph improvements: legend, hot keys, new toolbar icons
- #1530: Timezone switcher between Local and UTC time in Heatmaps
- #1582: Support `--include`/`--exclude` options for JFR to Heatmap/OTLP/pprof conversion
- #1624: Compatibility with OTLP v1.9.0
- #1629: Harden crash protection in StackWalker
### Breaking changes
- #1277: New `timeSpan` field in WallClockSample events
- #1518: Deprecate `check` command
- #1590: Support compilation on modern JDKs. Drop JDK 7 support
### Bug fixes
- #1599: Workaround for the kernel PERF_EVENT_IOC_REFRESH bug
- #1596: Do not block any signals during execution of a custom crash handler
- #1584: JfrReader loops on corrupted recordings
- #1555: Parse FlameGraph title from HTML input
- #1621: `loop` and `timeout` options do not work together
- #1641: Unwind vDSO correctly on Linux-ARM64
- #1648: Fix stop sequence in Profiler::start
- #1575: Fix CodeCache memory leak in lock profiling while looping
- #1558: Fix record-cpu bug when kernel stacks are not available
- #1651: Do not record CPU frame for non-perf samples
- #1614, #1615, #1617, #1623: Fix races related to VM termination
## [4.2.1] - 2025-11-22
### Bug fixes
- #1599: Workaround for the kernel PERF_EVENT_IOC_REFRESH bug
- #1596: Do not block any signals during execution of a custom crash handler
## [4.2] - 2025-10-20
### Features
- Java Method Tracing and Latency Profiling
* #1421: Latency profiling
* #1435: Allow wildcards in Instrument profiling engine
* #1499: `--trace` option with per-method latency threshold
- System-wide process sampling on Linux
* #1411: `--proc` option to record `profiler.ProcessSample` events
- VMStructs stack walker by default
* #1539: Use VMStructs stack walking mode by default
* #1537: Support `comptask` and `vtable` features
* #1517: Use JavaFrameAnchor to find top Java frame
* #1449: Special handling of prologue and epilogue of compiled methods
### Improvements
- #1475: Add `CPUTimeSample` event support to jfrconv
- #1414: Per-thread flamegraph option in JFR heatmap converter
- #1526: Expose JfrReader dictionary that maps osThreadId to javaThreadId
- #1448: Thread name in OpenTelemetry output
- #1413: Add `time_nanos` and `duration_nanos` to OTLP profiles
- #1450: Unwind dylib stubs as empty frames on macOS
- #1416: Add synthetic symbols for Mach-O stubs/trampolines
- Allow cross-compilation for 32-bit platforms
### Bug fixes
- #1515: Fix UnsatisfiedLinkError when tmpdir is set to a relative path
- #1500: Detect if `calloc` calls `malloc` for nativemem profiling
- #1427: Re-implement SafeAccess crash protection
- #1417: Two wall-clock profilers interfere with each other
### Project Infrastructure
- #1527: GHA: replace macos-13 with macos-15-intel
- #1510: Add option to retry tests
- #1508: Add more GHA jobs to cover JDK versions on ARM
- #1502: Fix job dependencies between integration tests and builds
- #1466: Add Liberica JDK on Alpaquita Linux to the CI
- Made integration tests more stable overall
## [4.1] - 2025-07-21
### Features
- Experimental support for the OpenTelemetry profiling signal
* #1188: OTLP output format and `dumpOtlp` Java API
* #1336: JFR to OTLP converter
- JDK 25 support
* #1222: Update VMStructs for JDK 25
- Productize native memory profiling
* #1193: Full `nativemem` support on macOS
* #1254: Fixed Nativemem tests on Alpine
* #1269: Native memory profiling now works with `jemalloc`
* #1323: `nativemem` shows allocations inside async-profiler itself
### Improvements
- #1174: Detect JVM in non-Java application and attach to it
- #1223: Native API to add custom events in JFR recording
- #1259: `--all` option to collect all possible events simultaneously
- #1286: Record which CPU a sample was taken on
- #1299: Skip last 10% allocations for leak detection
- #1300: Allow profiling kprobes/uprobes with `--fdtransfer`
- #1366: Rewrite `jfrconv` executable to shell
- #1400: Unwind checksum and digest intrinsics on ARM64
- #1357, #1389: VMStructs-based stack unwinding for `alloc` and `nativemem` profiling
### Bug fixes
- #1251: `--ttsp` option does not work on Alpine
- #1264: Guard hook installation with dlopen/dlclose
- #1319: SIGSEGV in PerfEvents::walk
- #1350: Disable JFR OldObjectSample event in jfrsync mode
- #1358: Do not dereference jmethodIDs on JDK 26
- #1374: Correctly check if profiler is preloaded
- #1380: Workaround clang type promotion bug
- #1387: JFR writer crashes when using cstack=vmx
- #1393: Improve stack walking termination logic: no endless `unknown` frames
- Stack unwinding fixes for ARM64
### Project Infrastructure
- #1129: Command-line option to filter tests
- #1262: Include `asprof.h` in async-profiler release package
- #1271: Release additional binaries with debug symbols
- #1274: Add Corretto 8 to the test matrix
- #1246, #1226: Run tests on Amazon Linux and Alpine Linux
- #1360: Auto-generated clang-tidy review comments
- #1373: Save all generated test logs for debug purposes
- Fixed flaky tests (#1282, #1307, #1376)
## [4.0] - 2025-04-08
### Features
@@ -331,7 +506,7 @@
### Features
- Converters between different output formats:
- JFR -> nflx (FlameScope)
- Collapsed stacks -> HTML 5 Flame Graph
- Collapsed stacks -> HTML 5 Flame Graph
### Improvements
- `profiler.sh` no longer requires bash (contributed by @cfstras)
@@ -415,7 +590,7 @@
### Features
- Interactive Call tree and Backtrace tree in HTML format (contributed by @rpulle)
- Experimental support for Java Flight Recorder (JFR) compatible output
### Improvements
- Added units: `ms`, `us`, `s` and multipliers: `K`, `M`, `G` for interval argument
- API and command-line option `-v` for profiler version
@@ -429,7 +604,7 @@
### Features
- Profiling of native functions, e.g. malloc
### Improvements
- JDK 9, 10, 11 support for heap profiling with accurate stack traces
- `root` can now profile Java processes of any user
@@ -441,7 +616,7 @@
- Produce SVG files out of the box; flamegraph.pl is no longer needed
- Profile ReentrantLock contention
- Java API
### Improvements
- Allocation and Lock profiler now works on JDK 7, too
- Faster dumping of results

View File

@@ -8,7 +8,7 @@ information to effectively respond to your bug report or contribution.
## Security issue notifications
If you discover a potential security issue in this project we ask that you notify our [Security Team](mailto:security@profiler.tools). Please do **not** create a public GitHub issue.
If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public GitHub issue.
## Reporting Bugs/Feature Requests

168
Makefile
View File

@@ -1,4 +1,4 @@
PROFILER_VERSION ?= 4.0
PROFILER_VERSION ?= 4.4
ifeq ($(COMMIT_TAG),true)
PROFILER_VERSION := $(PROFILER_VERSION)-$(shell git rev-parse --short=8 HEAD)
@@ -6,12 +6,18 @@ else ifneq ($(COMMIT_TAG),)
PROFILER_VERSION := $(PROFILER_VERSION)-$(COMMIT_TAG)
endif
TMP_DIR=/tmp
COMMA=,
PACKAGE_NAME=async-profiler-$(PROFILER_VERSION)-$(OS_TAG)-$(ARCH_TAG)
PACKAGE_DIR=/tmp/$(PACKAGE_NAME)
PACKAGE_DIR=$(TMP_DIR)/$(PACKAGE_NAME)
DEBUG_PACKAGE_NAME=$(PACKAGE_NAME)-debug
DEBUG_PACKAGE_DIR=$(PACKAGE_DIR)-debug
ASPROF=bin/asprof
JFRCONV=bin/jfrconv
LIB_PROFILER=lib/libasyncProfiler.$(SOEXT)
LIB_PROFILER_DEBUG=libasyncProfiler.$(SOEXT).debug
ASPROF_HEADER=include/asprof.h
API_JAR=jar/async-profiler.jar
CONVERTER_JAR=jar/jfr-converter.jar
TEST_JAR=test.jar
@@ -19,11 +25,15 @@ TEST_JAR=test.jar
CC ?= gcc
CXX ?= g++
STRIP ?= strip
OBJCOPY ?= objcopy
ifneq ($(CROSS_COMPILE),)
CC := $(CROSS_COMPILE)gcc
CXX := $(CROSS_COMPILE)g++
AS := $(CROSS_COMPILE)as
LD := $(CROSS_COMPILE)ld
STRIP := $(CROSS_COMPILE)strip
OBJCOPY := $(CROSS_COMPILE)objcopy
endif
CFLAGS_EXTRA ?=
@@ -42,13 +52,18 @@ JAR=$(JAVA_HOME)/bin/jar
JAVA=$(JAVA_HOME)/bin/java
JAVA_TARGET=8
JAVAC_OPTIONS=--release $(JAVA_TARGET) -Xlint:-options
TEST_JAVA ?= $(JAVA_HOME)/bin/java
TEST_LIB_DIR=build/test/lib
TEST_BIN_DIR=build/test/bin
TEST_DEPS_DIR=test/deps
TEST_GEN_DIR=test/gen
LOG_DIR=build/test/logs
LOG_LEVEL=
SKIP=
TEST_FLAGS=-DlogDir=$(LOG_DIR) -DlogLevel=$(LOG_LEVEL) -Dskip=$(SKIP)
RETRY_COUNT=0
TEST_THREADS ?= 8
TEST_FLAGS=-DlogDir=$(LOG_DIR) -DlogLevel=$(LOG_LEVEL) -Dskip='$(subst $(COMMA), ,$(SKIP))' -DretryCount=$(RETRY_COUNT) -DthreadCount=$(TEST_THREADS)
# always sort SOURCES so zInit is last.
SOURCES := $(sort $(wildcard src/*.cpp))
@@ -56,12 +71,15 @@ HEADERS := $(wildcard src/*.h)
RESOURCES := $(wildcard src/res/*)
JAVA_HELPER_CLASSES := $(wildcard src/helper/one/profiler/*.class)
API_SOURCES := $(wildcard src/api/one/profiler/*.java)
JAR_MANIFEST := src/api/one/profiler/MANIFEST.MF
CONVERTER_SOURCES := $(shell find src/converter -name '*.java')
TEST_SOURCES := $(shell find test -name '*.java')
TESTS ?= $(notdir $(patsubst %/,%,$(wildcard test/test/*/)))
TEST_SOURCES := $(shell find test -name '*.java' ! -path 'test/stubs/*')
TESTS ?=
CPP_TEST_SOURCES := test/native/testRunner.cpp $(shell find test/native -name '*Test.cpp')
CPP_TEST_HEADER := test/native/testRunner.hpp
CPP_TEST_INCLUDES := -Isrc -Itest/native
TEST_LIB_SOURCES := $(wildcard test/native/libs/*)
TEST_BIN_SOURCES := $(shell find test/test -name "*.c*")
ifeq ($(JAVA_HOME),)
JAVA_HOME:=$(shell java -cp . JavaHome)
@@ -82,7 +100,8 @@ ifeq ($(OS),Darwin)
MERGE=false
endif
else
CXXFLAGS += -U_FORTIFY_SOURCE -Wl,-z,defs -Wl,--exclude-libs,ALL -static-libstdc++ -static-libgcc -fdata-sections -ffunction-sections -Wl,--gc-sections
CXXFLAGS += -U_FORTIFY_SOURCE -Wl,-z,defs -Wl,--exclude-libs,ALL -static-libstdc++ -static-libgcc
CXXFLAGS += -fdata-sections -ffunction-sections -Wl,--gc-sections -ggdb -Wunused-variable -Wno-psabi
ifeq ($(MERGE),true)
CXXFLAGS += -fwhole-program
endif
@@ -119,14 +138,9 @@ ifneq (,$(STATIC_BINARY))
CFLAGS += -static -fdata-sections -ffunction-sections -Wl,--gc-sections
endif
ifneq (,$(findstring $(ARCH_TAG),x86 x64 arm64))
CXXFLAGS += -momit-leaf-frame-pointer
endif
.PHONY: all jar release build-test test clean coverage clean-coverage build-test-java build-test-cpp test-cpp test-java check-md format-md
.PHONY: all jar release build-test test native clean coverage clean-coverage build-test-java build-test-cpp build-test-libs build-test-bins test-cpp test-java check-md format-md
all: build/bin build/lib build/$(LIB_PROFILER) build/$(ASPROF) jar build/$(JFRCONV)
all: build/bin build/lib build/$(LIB_PROFILER) build/$(ASPROF) jar build/$(JFRCONV) build/$(ASPROF_HEADER)
jar: build/jar build/$(API_JAR) build/$(CONVERTER_JAR)
@@ -137,20 +151,29 @@ $(PACKAGE_NAME).tar.gz: $(PACKAGE_DIR)
tar czf $@ -C $(PACKAGE_DIR)/.. $(PACKAGE_NAME)
rm -r $(PACKAGE_DIR)
tar czf $(DEBUG_PACKAGE_NAME).tar.gz -C $(DEBUG_PACKAGE_DIR)/.. $(DEBUG_PACKAGE_NAME)
rm -r $(DEBUG_PACKAGE_DIR)
$(PACKAGE_NAME).zip: $(PACKAGE_DIR)
truncate -cs -`stat -f "%z" build/$(CONVERTER_JAR)` $(PACKAGE_DIR)/$(JFRCONV)
ifneq ($(GITHUB_ACTIONS), true)
codesign -s "Developer ID" -o runtime --timestamp -v $(PACKAGE_DIR)/$(ASPROF) $(PACKAGE_DIR)/$(JFRCONV) $(PACKAGE_DIR)/$(LIB_PROFILER)
endif
cat build/$(CONVERTER_JAR) >> $(PACKAGE_DIR)/$(JFRCONV)
ditto -c -k --keepParent $(PACKAGE_DIR) $@
rm -r $(PACKAGE_DIR)
$(PACKAGE_DIR): all LICENSE README.md
mkdir -p $(PACKAGE_DIR)
cp -RP build/bin build/lib LICENSE README.md $(PACKAGE_DIR)/
rm -rf $@
mkdir -p $(PACKAGE_DIR) $(DEBUG_PACKAGE_DIR)
cp -RP build/bin build/lib build/include LICENSE README.md $(PACKAGE_DIR)/
chmod -R 755 $(PACKAGE_DIR)
chmod 644 $(PACKAGE_DIR)/lib/* $(PACKAGE_DIR)/LICENSE $(PACKAGE_DIR)/README.md
chmod 644 $(PACKAGE_DIR)/lib/* $(PACKAGE_DIR)/include/* $(PACKAGE_DIR)/LICENSE $(PACKAGE_DIR)/README.md
ifeq ($(OS_TAG),linux)
$(STRIP) --only-keep-debug build/$(LIB_PROFILER) -o $(DEBUG_PACKAGE_DIR)/$(LIB_PROFILER_DEBUG)
$(STRIP) -g $@/$(LIB_PROFILER)
$(OBJCOPY) --add-gnu-debuglink=$(DEBUG_PACKAGE_DIR)/$(LIB_PROFILER_DEBUG) $@/$(LIB_PROFILER)
chmod 644 $(DEBUG_PACKAGE_DIR)/*
endif
build/%:
mkdir -p $@
@@ -159,9 +182,9 @@ build/$(ASPROF): src/main/* src/jattach/* src/fdtransfer.h
$(CC) $(CPPFLAGS) $(CFLAGS) $(DEFS) -o $@ src/main/*.cpp src/jattach/*.c
$(STRIP) $@
build/$(JFRCONV): src/launcher/* build/$(CONVERTER_JAR)
$(CC) $(CPPFLAGS) $(CFLAGS) $(DEFS) -o $@ src/launcher/*.cpp
$(STRIP) $@
build/$(JFRCONV): src/launcher/launcher.sh build/$(CONVERTER_JAR)
sed -e 's/PROFILER_VERSION/$(PROFILER_VERSION)/g' -e 's/BUILD_DATE/$(shell date "+%b %d %Y")/g' src/launcher/launcher.sh > $@
chmod +x $@
cat build/$(CONVERTER_JAR) >> $@
build/$(LIB_PROFILER): $(SOURCES) $(HEADERS) $(RESOURCES) $(JAVA_HELPER_CLASSES)
@@ -172,20 +195,24 @@ else
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(DEFS) $(INCLUDES) -fPIC -shared -o $@ $(SOURCES) $(LIBS)
endif
build/$(API_JAR): $(API_SOURCES)
build/$(ASPROF_HEADER): src/asprof.h
mkdir -p build/include
cp -f $< build/include
build/$(API_JAR): $(API_SOURCES) $(JAR_MANIFEST)
mkdir -p build/api
$(JAVAC) $(JAVAC_OPTIONS) -d build/api $(API_SOURCES)
$(JAR) cf $@ -C build/api .
$(JAR) cfm $@ $(JAR_MANIFEST) -C build/api .
$(RM) -r build/api
build/$(CONVERTER_JAR): $(CONVERTER_SOURCES) $(RESOURCES)
mkdir -p build/converter
$(JAVAC) $(JAVAC_OPTIONS) -d build/converter $(CONVERTER_SOURCES)
$(JAR) cfe $@ Main -C build/converter . -C src/res .
$(JAR) cfe $@ one.convert.Main -C build/converter . -C src/res .
$(RM) -r build/converter
%.class: %.java
$(JAVAC) -source 7 -target 7 -Xlint:-options -g:none $^
$(JAVAC) -source $(JAVA_TARGET) -target $(JAVA_TARGET) -Xlint:-options -g:none $^
build/test/cpptests: $(CPP_TEST_SOURCES) $(CPP_TEST_HEADER) $(SOURCES) $(HEADERS) $(RESOURCES) $(JAVA_HELPER_CLASSES)
mkdir -p build/test
@@ -196,38 +223,51 @@ else
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(DEFS) $(INCLUDES) $(CPP_TEST_INCLUDES) -fPIC -o $@ $(SOURCES) $(CPP_TEST_SOURCES) $(LIBS)
endif
build-test-java: all build/$(TEST_JAR) build-test-libs build-test-bins
build-test-java: all build/$(TEST_JAR) build/test/build-test-libs build/test/build-test-bins
build-test-cpp: build/test/cpptests build-test-libs
build-test-cpp: build/test/cpptests build/test/build-test-libs
build-test: build-test-cpp build-test-java
build-test-libs:
build/test/build-test-libs: $(TEST_LIB_SOURCES)
@mkdir -p $(TEST_LIB_DIR)
$(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libreladyn.$(SOEXT) test/native/libs/reladyn.c
$(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libcallsmalloc.$(SOEXT) test/native/libs/callsmalloc.c
$(CC) -shared -fPIC $(INCLUDES) -Isrc -o $(TEST_LIB_DIR)/libjnimalloc.$(SOEXT) test/native/libs/jnimalloc.c
$(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libmalloc.$(SOEXT) test/native/libs/malloc.c
$(CC) -fno-optimize-sibling-calls -shared -fPIC $(INCLUDES) -Isrc -o $(TEST_LIB_DIR)/libjninativestacks.$(SOEXT) test/native/libs/jninativestacks.c
$(CC) -shared -fPIC $(INCLUDES) -Isrc -o $(TEST_LIB_DIR)/libjninativelocks.$(SOEXT) test/native/libs/jninativelocks.c -lpthread
ifeq ($(OS_TAG),linux)
$(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libreladyn.$(SOEXT) test/native/libs/reladyn.c
$(CC) -shared -fPIC $(INCLUDES) -Isrc -o $(TEST_LIB_DIR)/libjnimalloc.$(SOEXT) test/native/libs/jnimalloc.c
$(CC) -c -shared -fPIC -o $(TEST_LIB_DIR)/vaddrdif.o test/native/libs/vaddrdif.c
$(LD) -N -shared -o $(TEST_LIB_DIR)/libvaddrdif.$(SOEXT) $(TEST_LIB_DIR)/vaddrdif.o -T test/native/libs/vaddrdif.ld
$(AS) -o $(TEST_LIB_DIR)/multiplematching.o test/native/libs/multiplematching.s
$(LD) -shared -o $(TEST_LIB_DIR)/multiplematching.$(SOEXT) $(TEST_LIB_DIR)/multiplematching.o
$(AS) -o $(TEST_LIB_DIR)/twiceatzero.o test/native/libs/twiceatzero.s
$(LD) -shared -o $(TEST_LIB_DIR)/libtwiceatzero.$(SOEXT) $(TEST_LIB_DIR)/twiceatzero.o --section-start=.seg1=0x4000 -z max-page-size=0x1000
endif
@touch $@
build-test-bins:
build/test/build-test-bins: $(TEST_BIN_SOURCES)
@mkdir -p $(TEST_BIN_DIR)
gcc -o $(TEST_BIN_DIR)/malloc_plt_dyn test/test/nativemem/malloc_plt_dyn.c
gcc -o $(TEST_BIN_DIR)/native_api -Isrc test/test/c/native_api.c -ldl
$(CC) -o $(TEST_BIN_DIR)/malloc_plt_dyn test/test/nativemem/malloc_plt_dyn.c
$(CC) -o $(TEST_BIN_DIR)/native_api -Isrc test/test/c/native_api.c -ldl
$(CC) -o $(TEST_BIN_DIR)/native_lock_contention test/test/nativelock/native_lock_contention.c -lpthread
$(CC) -o $(TEST_BIN_DIR)/profile_with_dlopen -Isrc test/test/nativemem/profile_with_dlopen.c -ldl
$(CC) -o $(TEST_BIN_DIR)/preload_malloc -Isrc test/test/nativemem/preload_malloc.c -ldl
$(CC) -o $(TEST_BIN_DIR)/nativemem_known_lib_crash -Isrc test/test/nativemem/nativemem_known_lib_crash.c -ldl
$(CXX) -o $(TEST_BIN_DIR)/non_java_app -std=c++11 $(INCLUDES) $(CPP_TEST_INCLUDES) test/test/nonjava/non_java_app.cpp $(LIBS)
@touch $@
test-cpp: build-test-cpp
echo "Running cpp tests..."
LD_LIBRARY_PATH="$(TEST_LIB_DIR)" build/test/cpptests
LD_LIBRARY_PATH="$(TEST_LIB_DIR)" DYLD_LIBRARY_PATH="$(TEST_LIB_DIR)" build/test/cpptests
test-java: build-test-java
echo "Running tests against $(LIB_PROFILER)"
$(JAVA) "-Djava.library.path=$(TEST_LIB_DIR)" $(TEST_FLAGS) -ea -cp "build/test.jar:build/jar/*:build/lib/*" one.profiler.test.Runner $(TESTS)
$(TEST_JAVA) $(TEST_FLAGS) -ea -cp "build/$(TEST_JAR):build/jar/*:$(TEST_DEPS_DIR)/*:$(TEST_GEN_DIR)/*" one.profiler.test.Runner $(subst $(COMMA), ,$(TESTS))
coverage: override FAT_BINARY=false
coverage: clean-coverage
@@ -236,18 +276,54 @@ coverage: clean-coverage
cd build/test/ && gcovr -r ../.. --html-details --gcov-executable "$(GCOV)" -o coverage/index.html
rm -rf -- -.gc*
test: test-cpp test-java
# unit tests shouldn't run if the user selects an integration test target
ifeq ($(TESTS),)
TEST_CPP := test-cpp
endif
build/$(TEST_JAR): $(TEST_SOURCES) build/$(CONVERTER_JAR)
mkdir -p build/test
$(JAVAC) -source $(JAVA_TARGET) -target $(JAVA_TARGET) -Xlint:-options -cp "build/jar/*:build/converter/*" -d build/test $(TEST_SOURCES)
$(JAR) cf $@ -C build/test .
test: $(TEST_CPP) test-java
native:
mkdir -p native/linux-x64 native/linux-arm64 native/macos
tar xfO async-profiler-$(PROFILER_VERSION)-linux-x64.tar.gz */build/libasyncProfiler.so > native/linux-x64/libasyncProfiler.so
tar xfO async-profiler-$(PROFILER_VERSION)-linux-arm64.tar.gz */build/libasyncProfiler.so > native/linux-arm64/libasyncProfiler.so
unzip -p async-profiler-$(PROFILER_VERSION)-macos.zip */build/libasyncProfiler.dylib > native/macos/libasyncProfiler.dylib
$(TEST_DEPS_DIR):
mkdir -p $@
build/$(TEST_JAR): build/$(API_JAR) $(TEST_SOURCES) build/$(CONVERTER_JAR) $(TEST_DEPS_DIR)
rm -rf build/test/classes
mkdir -p build/test/classes
$(JAVAC) -source $(JAVA_TARGET) -target $(JAVA_TARGET) -Xlint:-options -XDignore.symbol.file \
-implicit:none \
-cp "build/jar/*:$(TEST_DEPS_DIR)/*:$(TEST_GEN_DIR)/*:test/stubs" \
-d build/test/classes \
$(TEST_SOURCES)
$(JAR) cf $@ -C build/test/classes .
update-otlp-classes-jar:
@if [ -z "$(OTEL_PROTO_PATH)" ]; then \
echo "'OTEL_PROTO_PATH' is empty"; \
exit 1; \
fi
rm -rf $(TMP_DIR)/gen/java $(TMP_DIR)/build
mkdir -p $(TMP_DIR)/gen/java $(TMP_DIR)/build $(TEST_GEN_DIR)
cd $(OTEL_PROTO_PATH) && protoc --java_out=$(TMP_DIR)/gen/java $$(find . \
-type f \
-name '*.proto' \
-not \( -name 'logs*.proto' -o -name 'metrics*.proto' -o -name 'trace*.proto' -o -name '*service.proto' \))
$(JAVAC) -source $(JAVA_TARGET) \
-target $(JAVA_TARGET) \
-cp $(TEST_DEPS_DIR)/* \
-d $(TMP_DIR)/build \
-Xlint:-options \
$$(find $(TMP_DIR)/gen/java -name "*.java")
$(JAR) cvf $(TEST_GEN_DIR)/opentelemetry-gen-classes.jar -C $(TMP_DIR)/build .
LINT_SOURCES=`ls -1 src/*.cpp src/*/*.cpp | grep -v rustDemangle.cpp`
CLANG_TIDY_ARGS_EXTRA=
cpp-lint:
clang-tidy $(LINT_SOURCES) $(CLANG_TIDY_ARGS_EXTRA) -- -x c++ $(CXXFLAGS) $(INCLUDES) $(DEFS) $(LIBS)
DIFF_BASE=
cpp-lint-diff:
git diff -U0 $(DIFF_BASE) -- 'src/*.cpp' 'src/**/*.cpp' 'src/*.h' 'src/**/*.h' ':!**/rustDemangle.cpp' | \
clang-tidy-diff.py -p1 $(CLANG_TIDY_ARGS_EXTRA) -- -x c++ $(CXXFLAGS) $(INCLUDES) $(DEFS) $(LIBS)
check-md:
prettier -c README.md "docs/**/*.md"

View File

@@ -23,12 +23,12 @@ to learn about more features.
# Download
### Stable release: [3.0](https://github.com/async-profiler/async-profiler/releases/tag/v3.0)
### Stable release: [4.3](https://github.com/async-profiler/async-profiler/releases/tag/v4.3)
- Linux x64: [async-profiler-3.0-linux-x64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-linux-x64.tar.gz)
- Linux arm64: [async-profiler-3.0-linux-arm64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-linux-arm64.tar.gz)
- macOS x64/arm64: [async-profiler-3.0-macos.zip](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-macos.zip)
- Profile converters: [converter.jar](https://github.com/async-profiler/async-profiler/releases/download/v3.0/converter.jar)
- Linux x64: [async-profiler-4.3-linux-x64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v4.3/async-profiler-4.3-linux-x64.tar.gz)
- Linux arm64: [async-profiler-4.3-linux-arm64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v4.3/async-profiler-4.3-linux-arm64.tar.gz)
- macOS arm64/x64: [async-profiler-4.3-macos.zip](https://github.com/async-profiler/async-profiler/releases/download/v4.3/async-profiler-4.3-macos.zip)
- Profile converters: [jfr-converter.jar](https://github.com/async-profiler/async-profiler/releases/download/v4.3/jfr-converter.jar)
### Nightly builds

View File

@@ -5,5 +5,5 @@ When we receive such reports,
we will investigate and subsequently address
any potential vulnerabilities as quickly as possible.
If you discover a potential security issue in this project,
please notify our Security Team via [email](mailto:security@profiler.tools).
please notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/).
Please do *not* create a public GitHub issue in this project.

View File

@@ -0,0 +1,3 @@
FROM public.ecr.aws/bellsoft/alpaquita-linux-gcc:15.2-musl
RUN apk add --no-cache liberica21-jdk util-linux-misc curl

3
docker/alpine.Dockerfile Normal file
View File

@@ -0,0 +1,3 @@
FROM public.ecr.aws/docker/library/amazoncorretto:11-alpine-jdk
RUN apk add --no-cache make gcc g++ linux-headers musl-dev util-linux patchelf gcovr bash tar curl

View File

@@ -0,0 +1,37 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2
RUN amazon-linux-extras enable python3.8
RUN yum update -y && yum install -y git make python38 gcc10 gcc10-c++ binutils tar
ARG node_version=20.19.1
ARG node_sha256=babcd5b9e3216510b89305e6774bcdb2905ca98ff60028b67f163eb8296b6665
RUN curl -L --output node.tar.gz https://github.com/nodejs/node/archive/refs/tags/v${node_version}.tar.gz
RUN echo ${node_sha256} node.tar.gz | sha256sum -c
RUN mkdir /node
RUN tar xf node.tar.gz -C /node --strip-components=1
WORKDIR /node
ENV CC=gcc10-cc
ENV CXX=gcc10-c++
RUN ./configure
RUN make -j4 -s > /dev/null
RUN make install
FROM public.ecr.aws/amazonlinux/amazonlinux:2
COPY --from=0 /usr/local/bin/node /usr/local/bin/node
RUN amazon-linux-extras enable python3.8 && \
yum update -y && \
yum install -y gcc-c++ binutils make java-11-amazon-corretto patchelf tar python38 && \
yum clean all && \
rm -rf /var/cache/yum && \
python -m ensurepip && \
python -m pip install gcovr
ENV NODE_JS_LOCATION=/__e/node20
RUN cat <<EOF > /root/setup.sh
#!/bin/sh
mkdir -p "$NODE_JS_LOCATION/bin"
ln --force --symbolic "/usr/local/bin/node" "$NODE_JS_LOCATION/bin/node"
EOF

View File

@@ -0,0 +1,8 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2023
RUN yum update -y && \
yum install -y binutils findutils make tar gcc-c++ util-linux && \
yum clean all && \
rm -rf /var/cache/yum && \
python3 -m ensurepip && \
python3 -m pip install gcovr

View File

@@ -0,0 +1,10 @@
# Image for all tasks related to static code analysis in async-profiler
FROM public.ecr.aws/docker/library/amazoncorretto:11-alpine-jdk
ADD --chmod=555 https://raw.githubusercontent.com/llvm/llvm-project/67be4fe3d5fd986a3149de3806bcf2c92320015e/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py /usr/bin/
RUN apk add --no-cache clang-extra-tools linux-headers make python3 git py3-pip bash
# Needed by clang-tidy-diff.py to merge multiple results in one file.
# '--break-system-packages' is needed because Alpine does not like other package managers than 'apk' ('pip' in this case) to install
# software globally, but it's safe to do in this case.
RUN pip install --break-system-packages pyyaml
ENV CPLUS_INCLUDE_PATH="/usr/lib/jvm/java-11-amazon-corretto/include:/usr/lib/jvm/java-11-amazon-corretto/include/linux"

View File

@@ -2,15 +2,17 @@
async-profiler provides `jfrconv` utility to convert between different profile output formats.
`jfrconv` can be found at the same location as the `asprof` binary. Converter is also available
as a standalone Java application: [`jfr-converter.jar`](https://github.com/async-profiler/async-profiler/releases/download/v3.0/converter.jar).
as a standalone Java application: [`jfr-converter.jar`](https://github.com/async-profiler/async-profiler/releases/latest/download/jfr-converter.jar).
## Supported conversions
| Source | html | collapsed | pprof | pb.gz | heatmap |
| --------- | ---- | --------- | ----- | ----- | ------- |
| jfr | ✅ | ✅ | ✅ | ✅ | ✅ |
| html | ✅ | ✅ | ❌ | ❌ | ❌ |
| collapsed | ✅ | ✅ | ❌ | | |
The tool can convert several source formats into various outputs. The conversion capabilities are summarized below:
| Source format | to html | to collapsed | to pprof | to pb.gz | to heatmap | to otlp |
| ------------- | ------- | ------------ | -------- | -------- | ---------- | ------- |
| jfr | ✅ | ✅ | ✅ | | ✅ | ✅ |
| html | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
| collapsed | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
## Usage
@@ -29,27 +31,35 @@ Conversion options:
list of frames followed by a counter. This is used by the FlameGraph script to
generate the FlameGraph visualization of the profile data.
# flamegraph: FlameGraph is a hierarchical representation of call traces of the profiled
software in a color coded format that helps to identify a particular resource
usage like CPU and memory for the application.
# html: FlameGraph is a hierarchical representation of call traces of the profiled
software in a color coded format that helps to identify a particular resource
usage like CPU and memory for the application.
# pprof: pprof is a profiling visualization and analysis tool from Google. More details on
pprof on the official github page https://github.com/google/pprof.
pprof on the official github page https://github.com/google/pprof.
# pb.gz: This is a compressed version of pprof output.
# heatmap: A single page interactive heatmap that allows to explore profiling events
on a timeline.
# otlp: OpenTelemetry profile format.
Differential Flame Graph:
--diff <base-profile> <new-profile>
JFR options:
--cpu Generate only CPU profile during conversion
--cpu-time Generate only CPU profile, using CPUTimeSample events
--wall Generate only Wall clock profile during conversion
--alloc Generate only Allocation profile during conversion
--live Build allocation profile from live objects only during conversion
--nativemem Generate native memory allocation profile
--leak Only include memory leaks in nativemem
--lock Generate only Lock contention profile during conversion
--tail RATIO Ignore tail allocations for leak profiling (10% by default)
--lock Generate only lock contention profile during conversion
--nativelock Generate only native (pthread) lock contention profile
--trace Convert only MethodTrace events
-t --threads Split stack traces by threads
-s --state LIST Filter thread states: runnable, sleeping, default. State name is case insensitive
and can be abbreviated, e.g. -s r
@@ -69,6 +79,7 @@ JFR options:
# an absolute time in hh:mm:ss or yyyy-MM-dd'T'hh:mm:ss format;
# a relative time from the beginning of recording;
# a relative time from the end of recording (a negative number).
--latency MS Retain only samples within MethodTraces of at least MS milliseconds
Flame Graph options:
--title STRING Convert to Flame Graph with provided title
@@ -108,12 +119,12 @@ during a conversion.
jfrconv --cpu foo.jfr
# which is equivalent to:
# jfrconv --cpu -o flamegraph foo.jfr foo.html
# jfrconv --cpu -o html foo.jfr foo.html
```
for HTML output as HTML is the default format for conversion from JFR.
#### Flame Graph options
### Flame Graph options
To add a custom title to the generated Flame Graph, use `--title`, which has the default value `Flame Graph`:
@@ -121,9 +132,37 @@ To add a custom title to the generated Flame Graph, use `--title`, which has the
jfrconv --cpu foo.jfr foo.html -r --title "Custom Title"
```
### Other formats
### Differential Flame Graph
`jfrconv` supports converting a JFR file to `collapsed`, `pprof`, `pb.gz` and `heatmap` formats as well.
To find performance regressions, it may be useful to compare current profile
to a previous one that serves as a baseline. Differential Flame Graph
visualizes such a comparsion with a special color scheme:
- Red color denotes frames with more samples comparing to the baseline (i.e. regression);
- Blue is for frames with less samples;
- Yellow are new frames that were absent in the baseline.
The more intense the color, the larger the delta.
For each different frame, the delta value is displayed in a tooltip.
![](/.assets/images/flamegraph_diff.png)
Differential Flame Graph takes the shape of the current profile:
all frames have exactly the same size as in the normal Flame Graph.
This means, frames that exist only in the base profile will not be visible.
To see such frames, create another differential Flame Graph,
swapping the base and the current input file.
To create differential Flame Graph, run `jfrconv --diff` with two input files:
basline profile and new profile. Both files can be in JFR, HTML, or collapsed format.
Other converter options work as usual.
```
jfrconv --cpu --diff baseline.jfr new.jfr diff.html
```
Output file name is optional. If omitted, `jfrconv` takes the name
of the second input file, replacing its extension with `.diff.html`.
## Standalone converter examples

View File

@@ -10,6 +10,10 @@ process requires setting two kernel parameters. You can set them using sysctl as
# sysctl kernel.kptr_restrict=0
```
For better profiling accuracy, it is [recommended](Troubleshooting.md#known-limitations)
to start the JVM with `-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints` flags,
unless async-profiler is loaded at JVM startup.
## Find a process to profile
Common ways to find the target process include using

View File

@@ -79,10 +79,10 @@ retain stacks with matching frames only. All other stacks will be filtered out.
![](/.assets/images/heatmap6.png)
### Producing heatmaps
## Producing heatmaps
Use [`jfrconv`](ConverterUsage.md) tool with `-o heatmap` option
to generate heatmap from a recording in JFR format.
Heatmaps can only be generated from recordings in JFR format.
Run [`jfrconv`](ConverterUsage.md) tool with `-o heatmap` option.
Standard `jfrconv` options (`--cpu`, `--alloc`, `--from`/`--to`, `--simple`, etc.)
are also applicable to heatmaps.

View File

@@ -9,10 +9,17 @@ it is possible to attach async-profiler as an agent on the command line. For exa
$ java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,file=profile.html ...
```
On macOS, the library name is `libasyncProfiler.dylib` instead of `libasyncProfiler.so`.
Agent library is configured through the JVMTI argument interface.
The format of the arguments string is described
[in the source code](https://github.com/async-profiler/async-profiler/blob/v3.0/src/arguments.cpp#L44).
`asprof` actually converts command line arguments to that format.
The argument string is a comma-separated list of [profiler options](ProfilerOptions.md):
```
option[=value],option[=value]...
```
`asprof` internally converts command line arguments to the above format and attaches
`libasyncProfiler.so` agent to a running process.
Another important use of attaching async-profiler as an agent is for continuous profiling.

View File

@@ -58,3 +58,6 @@ async-profiler currently supports the following output formats:
about the JVM as well as the Java application running on it. async-profiler can generate output in `jfr` format
compatible with tools capable of viewing and analyzing `jfr` files. JDK Mission Control (JMC) and Intellij IDEA are
some of many options to visualize `jfr` files. More details [here](JfrVisualization.md).
- `otlp` - OpenTelemetry protocol format for [profiling data](https://opentelemetry.io/blog/2024/profiling).
Experimental feature: backward-incompatible changes may happen in future releases of async-profiler.

View File

@@ -18,55 +18,64 @@ The below options are `action`s for async-profiler and common for both `asprof`
| `resume` | Start or resume earlier profiling session that has been stopped. All the collected data remains valid. The profiling options are not preserved between sessions, and should be specified again. |
| `stop` | Stop profiling and print the report. |
| `dump` | Dump collected data without stopping profiling session. |
| `check` | Check if the specified profiling event is available. |
| `status` | Print profiling status: whether profiler is active and for how long. |
| `meminfo` | Print used memory statistics. |
| `metrics` | Print profiler metrics in Prometheus format. |
| `list` | Show the list of profiling events available for the target process specified with PID. |
## Options applicable to any output format
## General options
| asprof | Launch as agent | Description |
| ------------------ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `-o fmt` | `fmt` | Specifies what information to dump when profiling ends. For various dump option details, please refer to [Dump Option Appendix](#dump-option). |
| `-d N` | N/A | asprof-only option designed for interactive use. It is a shortcut for running 3 actions: start, sleep for N seconds, stop. If no `start`, `resume`, `stop` or `status` option is given, the profiler will run for the specified period of time and then automatically stop.<br>Example: `asprof -d 30 <pid>` |
| `--timeout N` | `timeout=N` | The profiling duration, in seconds. The profiler will run for the specified period of time and then automatically stop.<br>Example: `java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,timeout=30,file=profile.html <application>` |
| `-e --event EVENT` | `event=EVENT` | The profiling event: `cpu`, `alloc`, `nativemem`, `lock`, `cache-misses` etc. Use `list` to see the complete list of available events.<br>Please refer to [Profiling Modes](ProfilingModes.md) for additional information. |
| `-i --interval N` | `interval=N` | Interval has different meaning depending on the event. For CPU profiling, it's CPU time in nanoseconds. In wall clock mode, it's wall clock time. For Java method profiling or native function profiling, it's number of calls. For PMU profiling, it's number of events. Time intervals may be followed by `s` for seconds, `ms` for milliseconds, `us` for microseconds or `ns` for nanoseconds.<br>Example: `asprof -e cpu -i 5ms 8983` |
| `--alloc N` | `alloc=N` | Allocation profiling interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). |
| `--live` | `live` | Retain allocation samples with live objects only (object that have not been collected by the end of profiling session). Useful for finding Java heap memory leaks. |
| `--nativemem N` | `nativemem=N` | Native memory allocation profiling. N, if specified is the interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). Default N is 0. |
| `--nofree` | `nofree` | Will not record free calls in native memory allocation profiling. This is relevant when tracking memory leaks is not important and there are lots of free calls. |
| `--lock DURATION` | `lock=DURATION` | In lock profiling mode, sample contended locks when total lock duration overflows the threshold. |
| `-j N` | `jstackdepth=N` | Sets the maximum stack depth. The default is 2048.<br>Example: `asprof -j 30 8983` |
| `-I PATTERN` | `include=PATTERN` | Filter stack traces by the given pattern(s). `-I` defines the name pattern that _must_ be present in the stack traces. `-I` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -I 'Primes.*' -I 'java/*' 8983` |
| `-X PATTERN` | `exclude=PATTERN` | Filter stack traces by the given pattern(s). `-X` defines the name pattern that _must not_ occur in any of stack traces in the output. `-X` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -X '*Unsafe.park*' 8983` |
| `-L level` | `loglevel=level` | Log level: `debug`, `info`, `warn`, `error` or `none`. |
| `-F features` | `features=LIST` | Comma separated (or `+` separated when launching as an agent) list of stack walking features. Supported features are:<ul><li>`stats` - log stack walking performance stats.</li><li>`vtable` - display targets of megamorphic virtual calls as an extra frame on top of `vtable stub` or `itable stub`.</li><li>`comptask` - display current compilation task (a Java method being compiled) in a JIT compiler stack trace.</li><li>`pcaddr` - display instruction addresses .</li></ul>More details [here](AdvancedStacktraceFeatures.md). |
| `-f FILENAME` | `file` | The file name to dump the profile information to.<br>`%p` in the file name is expanded to the PID of the target JVM;<br>`%t` - to the timestamp;<br>`%n{MAX}` - to the sequence number;<br>`%{ENV}` - to the value of the given environment variable.<br>Example: `asprof -o collapsed -f /tmp/traces-%t.txt 8983` |
| `--loop TIME` | `loop=TIME` | Run profiler in a loop (continuous profiling). The argument is either a clock time (`hh:mm:ss`) or a loop duration in `s`econds, `m`inutes, `h`ours, or `d`ays. Make sure the filename includes a timestamp pattern, or the output will be overwritten on each iteration.<br>Example: `asprof --loop 1h -f /var/log/profile-%t.jfr 8983` |
| `--all-user` | `alluser` | Include only user-mode events. This option is helpful when kernel profiling is restricted by `perf_event_paranoid` settings. |
| `--sched` | `sched` | Group threads by Linux-specific scheduling policy: BATCH/IDLE/OTHER. |
| `--cstack MODE` | `cstack=MODE` | How to walk native frames (C stack). Possible modes are `fp` (Frame Pointer), `dwarf` (DWARF unwind info), `lbr` (Last Branch Record, available on Haswell since Linux 4.1), `vm`, `vmx` (HotSpot VM Structs) and `no` (do not collect C stack).<br><br>By default, C stack is shown in cpu, ctimer, wall-clock and perf-events profiles. Java-level events like `alloc` and `lock` collect only Java stack. |
| `--signal NUM` | `signal=NUM` | Use alternative signal for cpu or wall clock profiling. To change both signals, specify two numbers separated by a slash: `--signal SIGCPU/SIGWALL`. |
| `--clock SOURCE` | `clock=SOURCE` | Clock source for JFR timestamps: `tsc` (default) or `monotonic` (equivalent for `CLOCK_MONOTONIC`). |
| `--begin function` | `begin=FUNCTION` | Automatically start profiling when the specified native function is executed. |
| `--end function` | `end=FUNCTION` | Automatically stop profiling when the specified native function is executed. |
| `--ttsp` | `ttsp` | Time-to-safepoint profiling. An alias for `--begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized`.<br>It is not a separate event type, but rather a constraint. Whatever event type you choose (e.g. `cpu` or `wall`), the profiler will work as usual, except that only events between the safepoint request and the start of the VM operation will be recorded. |
| `--nostop` | `nostop` | Record profiling window between `--begin` and `--end`, but do not stop profiling outside window. |
| `--libpath PATH` | `libpath=PATH` | Full path to `libasyncProfiler.so` (useful when profiling a container from the host). |
| `--filter FILTER` | `filter=FILTER` | In the wall-clock profiling mode, profile only threads with the specified ids.<br>Example: `asprof -e wall -d 30 --filter 120-127,132,134 Computey` |
| `--fdtransfer` | `fdtransfer` | Run a background process that provides access to perf_events to an unprivileged process. `--fdtransfer` is useful for profiling a process in a container (which lacks access to perf_events) from the host.<br>See [Profiling Java in a container](ProfilingInContainer.md). |
| `--target-cpu` | `target-cpu` | In perf_events profiling mode, instruct the profiler to only sample threads running on the specified CPU, defaults to -1.<br>Example: `asprof --target-cpu 3`. |
| `-v --version` | `version` | Prints the version of profiler library. If PID is specified, gets the version of the library loaded into the given process. |
| asprof | Launch as agent | Description |
| -------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `-o fmt` | `fmt` | Specifies what information to dump when profiling ends. For various dump option details, please refer to [Dump Option Appendix](#dump-option). |
| `-f FILENAME` | `file=FILENAME` | The file name to dump the profile information to.<br>`%p` in the file name is expanded to the PID of the target JVM;<br>`%t` - to the timestamp;<br>`%n{MAX}` - to the sequence number;<br>`%{ENV}` - to the value of the given environment variable.<br>Example: `asprof -o collapsed -f /tmp/traces-%t.txt 8983` |
| `-d N` | N/A | asprof-only option designed for interactive use. It is a shortcut for running 3 actions: start, sleep for N seconds, stop. If no `start`, `resume`, `stop` or `status` option is given, the profiler will run for the specified period of time and then automatically stop.<br>Example: `asprof -d 30 <pid>` |
| `--timeout N` | `timeout=N` | The profiling duration, in seconds. The profiler will run for the specified period of time and then automatically stop.<br>Example: `java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,timeout=30,file=profile.html <application>` |
| `--loop TIME` | `loop=TIME` | Run profiler in a loop (continuous profiling). The argument is either a clock time (`hh:mm:ss`) or a loop duration in `s`econds, `m`inutes, `h`ours, or `d`ays. Make sure the filename includes a timestamp pattern, or the output will be overwritten on each iteration.<br>Example: `asprof --loop 1h -f /var/log/profile-%t.jfr 8983` |
| `-e --event EVENT` | `event=EVENT` | The profiling event: `cpu`, `alloc`, `nativemem`, `lock`, `cache-misses` etc. Use `list` to see the complete list of available events.<br>Please refer to [Profiling Modes](ProfilingModes.md) for additional information. |
| `-i --interval N` | `interval=N` | Interval has different meaning depending on the event. For CPU profiling, it's CPU time in nanoseconds. In wall clock mode, it's wall clock time. For Java method profiling or native function profiling, it's number of calls. For PMU profiling, it's number of events. Time intervals may be followed by `s` for seconds, `ms` for milliseconds, `us` for microseconds or `ns` for nanoseconds.<br>Example: `asprof -e cpu -i 5ms 8983` |
| `--alloc N` | `alloc=N` | Allocation profiling interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). |
| `--tlab` | `tlab` | Use TLAB events for allocation profiling |
| `--live` | `live` | Retain allocation samples with live objects only (object that have not been collected by the end of profiling session). Useful for finding Java heap memory leaks. |
| `--nativemem N` | `nativemem=N` | Native memory allocation profiling. N, if specified is the interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). Default N is 0. |
| `--nofree` | `nofree` | Will not record free calls in native memory allocation profiling. This is relevant when tracking memory leaks is not important and there are lots of free calls. |
| `--trace METHOD[:T]` | `trace=METHOD[:T]` | Java method to be traced, optionally followed by a latency threshold.<br>Example: `--trace my.pkg.Class.Method:50ms`.<br>Latency threshold defaults to 0 (all calls are profiled). Can be used multiple times. |
| `--lock TIME` | `lock=TIME` | In lock profiling mode, sample contended locks whenever total lock wait time overflows the specified threshold. |
| `--nativelock TIME` | `nativelock=TIME ` | In native lock profiling mode, sample contended pthread locks (mutex/rwlock) whenever total lock wait time overflows the specified threshold. |
| `--wall INTERVAL` | `wall=INTERVAL` | Wall clock profiling interval. Use this option instead of `-e wall` to enable wall clock profiling with another event, typically `cpu`.<br>Example: `asprof -e cpu --wall 100ms -f combined.jfr 8983`. |
| `--nobatch` | `nobatch` | Disable wall clock profiling optimization. Async-profiler will emit one `jdk.ExecutionSample` event for each wall clock sample instead of batching them in a custom `profiler.WallClockSample` event. |
| `-j N` | `jstackdepth=N` | Sets the maximum stack depth. The default is 2048.<br>Example: `asprof -j 30 8983`<br>The argument may include two numbers separated by `/` (e.g. `200/40`). In this case, stack traces deeper than 200 frames will be truncated to the top 40 frames. This can be useful to prevent a deep recursion from bloating the profile. |
| `-F features` | `features=LIST` | Comma separated (or `+` separated when launching as an agent) list of stack walking features. Supported features are:<ul><li>`stats` - log stack walking performance stats.</li><li>`vtable` - display targets of megamorphic virtual calls as an extra frame on top of `vtable stub` or `itable stub`.</li><li>`comptask` - display current compilation task (a Java method being compiled) in a JIT compiler stack trace.</li><li>`pcaddr` - display instruction addresses .</li></ul>More details [here](AdvancedStacktraceFeatures.md). |
| `-L level` | `loglevel=level` | Log level: `debug`, `info`, `warn`, `error` or `none`. |
| N/A | `log=FILENAME` | Dedicated file for log messages. Used internally by asprof. |
| N/A | `quiet` | Do not log "Profiling started/stopped" message. Used internally by asprof. |
| N/A | `server=ADDRESS` | Start insecure HTTP server with the given IP address/port to control the profiler. This option can be specified as `-agentpath` argument only. Be careful not to expose async-profiler server in a public network. |
| `--all-user` | `alluser` | Include only user-mode events. This option is helpful when kernel profiling is restricted by `perf_event_paranoid` settings. |
| `--sched` | `sched` | Group threads by Linux-specific scheduling policy: BATCH/IDLE/OTHER. |
| `--cstack MODE` | `cstack=MODE` | How to walk native frames (C stack). Possible modes are `fp` (Frame Pointer), `dwarf` (DWARF unwind info), `vm`, `vmx` (HotSpot VM Structs) and `no` (do not collect C stack).<br><br>By default, C stack is shown in cpu, ctimer, wall-clock and perf-events profiles. Java-level events like `alloc` and `lock` collect only Java stack. |
| `--signal NUM` | `signal=NUM` | Use alternative signal for cpu or wall clock profiling. To change both signals, specify two numbers separated by a slash: `--signal SIGCPU/SIGWALL`. |
| `--clock SOURCE` | `clock=SOURCE` | Clock source for JFR timestamps: `tsc` (default) or `monotonic` (equivalent for `CLOCK_MONOTONIC`). |
| `--begin function` | `begin=FUNCTION` | Automatically start profiling when the specified native function is executed. |
| `--end function` | `end=FUNCTION` | Automatically stop profiling when the specified native function is executed. |
| `--ttsp` | `ttsp` | Time-to-safepoint profiling. An alias for `--begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized`.<br>It is not a separate event type, but rather a constraint. Whatever event type you choose (e.g. `cpu` or `wall`), the profiler will work as usual, except that only events between the safepoint request and the start of the VM operation will be recorded. |
| `--nostop` | `nostop` | Record profiling window between `--begin` and `--end`, but do not stop profiling outside window. |
| `--memlimit SIZE` | `memlimit=SIZE` | Limit memory used by the call trace storage. Once the limit is exceeded, no new stack traces will be recorded. The lowest possible limit is 10 MB; the default is unlimited.<br>Example: `asprof -e cpu --memlimit 128m` |
| `--libpath PATH` | N/A | Full path to `libasyncProfiler.so` (useful when profiling a container from the host). |
| `--filter FILTER` | `filter=FILTER` | In the wall-clock profiling mode, profile only threads with the specified ids.<br>Example: `asprof -e wall -d 30 --filter 120-127,132,134 Computey` |
| `--fdtransfer` | `fdtransfer` | Run a background process that provides access to perf_events to an unprivileged process. `--fdtransfer` is useful for profiling a process in a container (which lacks access to perf_events) from the host.<br>See [Profiling Java in a container](ProfilingInContainer.md). |
| `--target-cpu` | `target-cpu` | In perf_events profiling mode, instruct the profiler to only sample threads running on the specified CPU, defaults to -1.<br>Example: `asprof --target-cpu 3`. |
| `--record-cpu` | `record-cpu` | In perf_events profiling mode, instruct the profiler to capture which CPU a sample was taken on. |
| `-v --version` | `version` | Prints the version of profiler library. If PID is specified, gets the version of the library loaded into the given process. |
## Options applicable to JFR output only
| asprof | Launch as agent | Description |
| ------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `--chunksize N` | `chunksize=N` | Approximate size for a single JFR chunk. A new chunk will be started whenever specified size is reached. The default `chunksize` is 100MB.<br>Example: `asprof -f profile.jfr --chunksize 100m 8983` |
| `--chunktime N` | `chunktime=N` | Approximate time limit for a single JFR chunk. A new chunk will be started whenever specified time limit is reached. The default `chunktime` is 1 hour.<br>Example: `asprof -f profile.jfr --chunktime 1h 8983` |
| `--jfropts OPTIONS` | `jfropts=OPTIONS` | Comma separated list of JFR recording options. Currently, the only available option is `mem` supported on Linux 3.17+. `mem` enables accumulating events in memory instead of flushing synchronously to a file. |
| `--jfrsync CONFIG` | `jfrsync[=CONFIG]` | Start Java Flight Recording with the given configuration synchronously with the profiler. The output .jfr file will include all regular JFR events, except that execution samples will be obtained from async-profiler. This option implies `-o jfr`.<br>`CONFIG` is a predefined JFR profile or a JFR configuration file (.jfc) or a list of JFR events started with `+`.<br><br>Example: `asprof -e cpu --jfrsync profile -f combined.jfr 8983` |
| asprof | Launch as agent | Description |
| ------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `--chunksize N` | `chunksize=N` | Approximate size for a single JFR chunk. A new chunk will be started whenever specified size is reached. The default `chunksize` is 100MB.<br>Example: `asprof -f profile.jfr --chunksize 100m 8983` |
| `--chunktime N` | `chunktime=N` | Approximate time limit for a single JFR chunk. A new chunk will be started whenever specified time limit is reached. The default `chunktime` is 1 hour.<br>Example: `asprof -f profile.jfr --chunktime 1h 8983` |
| `--jfropts OPTIONS` | `jfropts=OPTIONS` | Comma separated list of JFR recording options. Currently, the only available option is `mem` supported on Linux 3.17+. `mem` enables accumulating events in memory instead of flushing synchronously to a file. |
| `--jfrsync CONFIG` | `jfrsync[=CONFIG]` | Start Java Flight Recording with the given configuration synchronously with the profiler. The output .jfr file will include all regular JFR events, except that execution samples will be obtained from async-profiler. This option implies `-o jfr`.<br>`CONFIG` is a predefined JFR profile or a JFR configuration file (.jfc) or a list of JFR events started with `+`.<br>Example: `asprof -e cpu --jfrsync profile -f combined.jfr 8983` |
| `--proc INTERVAL` | `proc=INTERVAL` | Collect statistics about other processes in the system. Default sampling interval is 30s. |
| `--all` | `all` | Shorthand for enabling `cpu`, `wall`, `alloc`, `live`, `lock`, `nativelock`, `nativemem`, and `proc` profiling simultaneously. This can be combined with `--alloc 2m --lock 10ms` etc. to pass custom interval/threshold. It is also possible to combine it with `-e` argument to change the type of event being collected (default is `cpu`). This is not recommended for production, especially for continuous profiling. |
## Options applicable to FlameGraph and Tree view outputs only
@@ -83,15 +92,20 @@ By default, async-profiler merges stack traces starting from the outermost (e.g.
## Options applicable to any output format except JFR
| asprof | Launch as agent | Description |
| -------------- | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `-t --threads` | `threads` | Profile threads separately. Each stack trace will end with a frame that denotes a single thread.<br>Example: `asprof -t 8983` |
| `-s --simple` | `simple` | Print simple class names instead of fully qualified names. |
| `-n --norm` | `norm` | Normalize names of hidden classes / lambdas. |
| `-g --sig` | `sig` | Print method signatures. |
| `-l --lib` | `lib` | Prepend library names to symbols, e.g. ``libjvm.so`JVM_DefineClassWithSource``. |
| `--total` | `total` | Count the total value of the collected metric instead of the number of samples, e.g. total allocation size. |
| `-a --ann` | `ann` | Annotate JIT compiled methods with `_[j]`, inlined methods with `_[i]`, interpreted methods with `_[0]` and C1 compiled methods with `_[1]`. FlameGraph and Tree view will color frames depending on their type regardless of this option. |
| asprof | Launch as agent | Description |
| -------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `-t --threads` | `threads` | Profile threads separately. Each stack trace will end with a frame that denotes a single thread.<br>Example: `asprof -t 8983` |
| `-s --simple` | `simple` | Print simple class names instead of fully qualified names. |
| `-n --norm` | `norm` | Normalize names of hidden classes / lambdas. |
| `-g --sig` | `sig` | Print method signatures. |
| `-a --ann` | `ann` | Annotate JIT compiled methods with `_[j]`, inlined methods with `_[i]`, interpreted methods with `_[0]` and C1 compiled methods with `_[1]`. FlameGraph and Tree view will color frames depending on their type regardless of this option. |
| `-l --lib` | `lib` | Prepend library names to symbols, e.g. ``libjvm.so`JVM_DefineClassWithSource``. |
| `--dot` | `dot` | Dotted class names, e.g. `java.lang.String` instead of `java/lang/String`. |
| `--samples` | `samples` | Count the number of samples. This is the default aggregation option. |
| `--total` | `total` | Count the total value of the collected metric instead of the number of samples, e.g. total allocation size. |
| `-I PATTERN` | `include=PATTERN` | Filter stack traces by the given pattern(s). `-I` defines the name pattern that _must_ be present in the stack traces. `-I` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -I 'Primes.*' -I 'java/*' 8983` |
| `-X PATTERN` | `exclude=PATTERN` | Filter stack traces by the given pattern(s). `-X` defines the name pattern that _must not_ occur in any of stack traces in the output. `-X` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -X '*Unsafe.park*' 8983` |
| N/A | `mcache[=AGE]` | Maximum age of the method name cache. Default is `0` (do not cache method names between profiling sessions). |
## Appendix
@@ -111,5 +125,6 @@ By default, async-profiler merges stack traces starting from the outermost (e.g.
- `flamegraph` - produce Flame Graph in HTML format.
- `tree` - produce Call Tree in HTML format.
- `--reverse` option will generate backtrace view.
- `otlp` - dump events in OpenTelemetry format.
It is possible to specify multiple dump options at the same time.

View File

@@ -117,10 +117,19 @@ jfrconv --total --nativemem --leak app.jfr app-leak.html
jfrconv --total --nativemem app.jfr app-malloc.html
```
When `--leak` option is used, the generated flame graph will show allocations without matching `free` calls. If `-nofree` is specified, every allocation will be reported as a leak:
When `--leak` option is used, the generated flame graph will show allocations without matching `free` calls.
![nativemem flamegraph](../.assets/images/nativemem_flamegraph.png)
To avoid bias towards youngest allocations not freed by the end of the profiling session,
leak profiler ignores tail allocations made in the last 10% of the profiling period.
Tail length can be altered with `--tail` option that accepts `ratio` or `percent%` as an argument.
For example, to ignore allocations in the last 2 minutes of a 10 minutes profile, use
```
jfrconv --nativemem --leak --tail 20% app.jfr app-leak.html
```
The overhead of `nativemem` profiling depends on the number of native allocations,
but is usually small enough even for production use. If required, the overhead can be reduced
by configuring the profiling interval. E.g. if you add `nativemem=1m` profiler option,
@@ -136,7 +145,7 @@ Run an application with `nativemem` profiler that dumps recordings in JFR format
LD_PRELOAD=/path/to/libasyncProfiler.so ASPROF_COMMAND=start,nativemem,total,loop=10m,cstack=dwarf,file=profile-%t.jfr NativeApp [args]
```
Then run `jfrconv` to generate memory leak reports as flame graphs:
Then run `jfrconv` to generate memory leak report as a flame graph:
```
jfrconv --total --nativemem --leak <profile>.jfr <profile>-leak.html
@@ -163,6 +172,30 @@ enter this lock/monitor.
Example: `asprof -e lock -t -i 5ms -f result.html 8983`
## Native lock profiling
`--nativelock` option tells async-profiler to measure pthread lock contention in the profiled application.
Native lock profiling can help developers understand pthread lock acquisition patterns, lock contention (when threads
have to wait to acquire native locks), time spent waiting for pthread mutexes and read-write locks, and which code paths
are blocked due to native synchronization primitives.
Native lock profiling works by intercepting calls to:
- [`pthread_mutex_lock`](https://man7.org/linux/man-pages/man3/pthread_mutex_lock.3p.html)
- [`pthread_rwlock_rdlock`](https://man7.org/linux/man-pages/man3/pthread_rwlock_rdlock.3p.html)
- [`pthread_rwlock_wrlock`](https://man7.org/linux/man-pages/man3/pthread_rwlock_wrlock.3p.html)
In this mode, the top frame shows the native function that experienced contention (e.g., pthread_mutex_lock_hook),
and the counter represents the number of nanoseconds threads spent waiting to acquire the lock.
Key differences from Java lock profiling:
- Profiles native pthread locks instead of Java monitors.
- Works with C/C++ applications and native libraries used by Java applications.
- Captures contention in native code paths that Java lock profiling cannot see.
Example: `asprof --nativelock 5ms -t -f result.html 8983`
## Java method profiling
`-e ClassName.methodName` option instruments the given Java method
@@ -180,9 +213,14 @@ of all compiled methods. The subsequent instrumentation flushes only the _depend
The massive CodeCache flush doesn't occur if attaching async-profiler as an agent.
### Java native method profiling
### Latency profiling
Here are some useful native methods to profile:
Please refer to our blog post on [latency profiling](https://github.com/async-profiler/async-profiler/discussions/1497)
to know more about this profiling mode.
## Native function profiling
Here are some useful native functions to profile:
- `G1CollectedHeap::humongous_obj_allocate` - trace _humongous allocations_ of the G1 GC,
- `JVM_StartThread` - trace creation of new Java threads,
@@ -221,6 +259,40 @@ The same, when starting profiler as an agent:
-agentpath:/path/to/libasyncProfiler.so=start,event=cpu,alloc=2m,lock=10ms,file=profile.jfr
```
### Multi-event profiling using `--all`
The `--all` flag offers a way to simultaneously enable predefined collection of common profiling events. By default, `--all` activates profiling for `cpu`, `wall`, `alloc`, `live`, `lock` and `nativemem`.
**Important consideration**
While the `--all` flag can be useful for development environments to get a wide overview, it is not recommended to enable this in production, especially for continuous profiling. Users are invited to select carefully what to profile and with what settings.
**Sample command:**
This command enables the default set of events included in `--all`:
```
asprof --all -f profile.jfr
```
or combine it with `--alloc`/`--wall`/`--lock`/`--nativemem` options to override individual settings. For example:
```
asprof --all --alloc 2m --lock 10ms -f profile.jfr
```
The same, when starting profiler as an agent:
```
-agentpath:/path/to/libasyncProfiler.so=start,all,alloc=2m,lock=10ms,file=profile.jfr
```
Instead of `cpu`, it is possible to override the `--all` parameter with any other event type of your choice. For instance, the following command will profile `cycles` along with ` wall`, `alloc`, `live`, `lock` and `nativemem`:
```
asprof --all -e cycles -f profile.jfr
```
## Continuous profiling
Continuous profiling is a means by which an application can be profiled
@@ -247,6 +319,7 @@ asprof --loop 1h -f /var/log/profile-%t.jfr 8983
| `-e page-faults` | Software page faults |
| `-e context-switches` | Context switches |
| `-e cycles` | Total CPU cycles |
| `-e ref-cycles` | CPU reference cycles, not affected by CPU frequency scaling |
| `-e instructions` | Retired CPU instructions |
| `-e cache-references` | Cache accesses (usually Last Level Cache, but may depend on the architecture) |
| `-e cache-misses` | Cache accesses requiring fetching data from a higher-level cache or main memory |

View File

@@ -25,25 +25,9 @@ Similar to the
[Java API](IntegratingAsyncProfiler.md#using-java-api),
there is a C API for using profiler inside a native application.
```
typedef const char* asprof_error_t;
typedef void (*asprof_writer_t)(const char* buf, size_t size);
Header file for the API is bundled in the async-profiler release package under [`include/asprof.h`](../src/asprof.h).
// Should be called once prior to any other API functions
DLLEXPORT void asprof_init();
typedef void (*asprof_init_t)();
// Returns an error message for the given error code or NULL if there is no error
DLLEXPORT const char* asprof_error_str(asprof_error_t err);
typedef const char* (*asprof_error_str_t)(asprof_error_t err);
// Executes async-profiler command using output_callback as an optional sink
// for the profiler output. Returns an error code or NULL on success.
DLLEXPORT asprof_error_t asprof_execute(const char* command, asprof_writer_t output_callback);
typedef asprof_error_t (*asprof_execute_t)(const char* command, asprof_writer_t output_callback);
```
To use it in a C/C++ application, include `asprof.h`. Below is an example showing how to invoke async-profiler with the API:
To use it in a C/C++ application, include the mentioned `asprof.h`. Below is an example showing how to invoke async-profiler with the API:
```
#include "asprof.h"

View File

@@ -2,12 +2,15 @@
## Frame Pointer
The default stacking walking in async-profiler, `Frame Pointer (FP)` stack walking, is a technique for collecting call
stacks by tracking frame pointers in memory. Each function call maintains a pointer to its caller's stack frame, creating
a linked chain that can be traversed to reconstruct the program's execution path. It's particularly efficient as it is
very fast compared to other stack walking methods introducing less overhead but requires code to be compiled with frame
`Frame Pointer (FP)` stack walking is a technique for collecting call stacks by tracking frame pointers in memory.
Each function call maintains a pointer to its caller's stack frame, creating a linked chain that can be traversed
to reconstruct the program's execution path. It's particularly efficient as it is very fast compared to other
stack walking methods introducing less overhead but requires code to be compiled with frame
pointers enabled (`-fno-omit-frame-pointer`).
Before async-profiler 4.2, Frame Pointer was the default stack walking mode.
Since version 4.2, the default was changed to [VM Structs](#vm-structs).
## DWARF
DWARF stack walking is a method to reconstruct call stacks using unwinding information embedded in executables
@@ -20,18 +23,6 @@ due to being signal safe in async-profiler.
The feature can be enabled with the option `--cstack dwarf` (or its agent equivalent `cstack=dwarf`).
## LBR
Modern Intel CPUs can profile branch instructions, including `call`s and `ret`s, and store their source and destination
addresses (Last Branch Records) in hardware registers. Starting from Haswell, CPU can match these addresses to form a
branch stack. This branch stack will be effectively a call chain automatically collected by the hardware.
LBR stacks are not always complete or accurate, but they still appear much more helpful comparing to FP-based stack
walking, when a native library is compiled with omitted frame pointers. It works only with hardware events like
`-e cycles` (`instructions`, `cache-misses` etc.) and the maximum call chain depth is 32 (hardware limit).
The feature can be enabled with the option `--cstack lbr` (or its agent equivalent `cstack=lbr`).
## VM Structs
async-profiler can leverage JVM internal structures to replicate the logic of Java stack walking
@@ -57,6 +48,7 @@ Due to issues with AGCT from time to time, including random crashes and missing
- Provides additional information on each frame, like JIT compilation type.
The feature can be enabled with the option `--cstack vm` (or its agent equivalent `cstack=vm`).
Since async-profiler 4.2, this is the default mode when running on the HotSpot JVM.
Another variant of this option: `--cstack vmx` activates an "expert" unwinding based on VM Structs.
With this option, async-profiler collects mixed stack traces that have Java and native frames interleaved.

View File

@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>tools.profiler</groupId>
<artifactId>jfr-converter</artifactId>
<version>4.0</version>
<version>4.4</version>
<packaging>jar</packaging>
<name>async-profiler</name>
@@ -57,7 +57,7 @@
<configuration>
<archive>
<manifest>
<mainClass>Main</mainClass>
<mainClass>one.convert.Main</mainClass>
</manifest>
</archive>
</configuration>
@@ -102,17 +102,15 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.sonatype.central</groupId>
<artifactId>central-publishing-maven-plugin</artifactId>
<version>0.8.0</version>
<extensions>true</extensions>
<configuration>
<publishingServerId>central</publishingServerId>
</configuration>
</plugin>
</plugins>
</build>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
<repository>
<id>ossrh</id>
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2</url>
</repository>
</distributionManagement>
</project>

60
pom.xml
View File

@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>tools.profiler</groupId>
<artifactId>async-profiler</artifactId>
<version>4.0</version>
<version>4.4</version>
<packaging>jar</packaging>
<name>async-profiler</name>
@@ -56,19 +56,53 @@
<version>3.3.0</version>
<executions>
<execution>
<id>linux-x64-jar</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>${native.platform}</classifier>
<classifier>linux-x64</classifier>
<includes>
<include>${native.platform}/*</include>
<include>linux-x64/*</include>
<include>one/**</include>
</includes>
</configuration>
</execution>
<execution>
<id>linux-arm64-jar</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>linux-arm64</classifier>
<includes>
<include>linux-arm64/*</include>
<include>one/**</include>
</includes>
</configuration>
</execution>
<execution>
<id>macos-jar</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>macos</classifier>
<includes>
<include>macos/*</include>
<include>one/**</include>
</includes>
</configuration>
</execution>
</executions>
<configuration>
<archive>
<manifestFile>src/api/one/profiler/MANIFEST.MF</manifestFile>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@@ -116,17 +150,15 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.sonatype.central</groupId>
<artifactId>central-publishing-maven-plugin</artifactId>
<version>0.8.0</version>
<extensions>true</extensions>
<configuration>
<publishingServerId>central</publishingServerId>
</configuration>
</plugin>
</plugins>
</build>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
<repository>
<id>ossrh</id>
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2</url>
</repository>
</distributionManagement>
</project>

View File

@@ -18,6 +18,33 @@ u64 AllocTracer::_interval;
volatile u64 AllocTracer::_allocated_bytes;
Error AllocTracer::initialize() {
if (_in_new_tlab.entry() == 0 || _outside_tlab.entry() == 0) {
CodeCache* libjvm = VMStructs::libjvm();
const void* ne;
const void* oe;
if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer27send_allocation_in_new_tlab")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer28send_allocation_outside_tlab")) != NULL) {
_trap_kind = 1; // JDK 10+
} else if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer33send_allocation_in_new_tlab_eventE11KlassHandleP8HeapWord")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer34send_allocation_outside_tlab_eventE11KlassHandleP8HeapWord")) != NULL) {
_trap_kind = 1; // JDK 8u262+
} else if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer33send_allocation_in_new_tlab_event")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer34send_allocation_outside_tlab_event")) != NULL) {
_trap_kind = 2; // JDK 7-9
} else {
return Error("No AllocTracer symbols found. Are JDK debug symbols installed?");
}
_in_new_tlab.assign(ne);
_outside_tlab.assign(oe);
_in_new_tlab.pair(_outside_tlab);
}
return Error::OK;
}
// Called whenever our breakpoint trap is hit
void AllocTracer::trapHandler(int signo, siginfo_t* siginfo, void* ucontext) {
StackFrame frame(ucontext);
@@ -69,44 +96,15 @@ void AllocTracer::recordAllocation(void* ucontext, EventType event_type, uintptr
Profiler::instance()->recordSample(ucontext, total_size, event_type, &event);
}
Error AllocTracer::check(Arguments& args) {
if (args._live) {
Error AllocTracer::start(Arguments& args) {
if (args._live && !args._all) {
// This engine is only going to be selected in Profiler::selectAllocEngine
// when can_generate_sampled_object_alloc_events is not available, i.e. JDK<11.
return Error("'live' option is supported on OpenJDK 11+");
}
if (_in_new_tlab.entry() != 0 && _outside_tlab.entry() != 0) {
return Error::OK;
}
CodeCache* libjvm = VMStructs::libjvm();
const void* ne;
const void* oe;
if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer27send_allocation_in_new_tlab")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer28send_allocation_outside_tlab")) != NULL) {
_trap_kind = 1; // JDK 10+
} else if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer33send_allocation_in_new_tlab_eventE11KlassHandleP8HeapWord")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer34send_allocation_outside_tlab_eventE11KlassHandleP8HeapWord")) != NULL) {
_trap_kind = 1; // JDK 8u262+
} else if ((ne = libjvm->findSymbolByPrefix("_ZN11AllocTracer33send_allocation_in_new_tlab_event")) != NULL &&
(oe = libjvm->findSymbolByPrefix("_ZN11AllocTracer34send_allocation_outside_tlab_event")) != NULL) {
_trap_kind = 2; // JDK 7-9
} else {
return Error("No AllocTracer symbols found. Are JDK debug symbols installed?");
}
_in_new_tlab.assign(ne);
_outside_tlab.assign(oe);
_in_new_tlab.pair(_outside_tlab);
return Error::OK;
}
Error AllocTracer::start(Arguments& args) {
Error error = check(args);
if (error) {
return error;
}
Error error = initialize();
if (error) return error;
_interval = args._alloc > 0 ? args._alloc : 0;
_allocated_bytes = 0;

View File

@@ -22,6 +22,7 @@ class AllocTracer : public Engine {
static u64 _interval;
static volatile u64 _allocated_bytes;
static Error initialize();
static void recordAllocation(void* ucontext, EventType event_type, uintptr_t rklass,
uintptr_t total_size, uintptr_t instance_size);
@@ -38,7 +39,6 @@ class AllocTracer : public Engine {
return "bytes";
}
Error check(Arguments& args);
Error start(Arguments& args);
void stop();

View File

@@ -0,0 +1,26 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.profiler;
import javax.management.ObjectName;
import java.lang.management.ManagementFactory;
public class Agent {
public static void premain(String args) throws Exception {
agentmain(args);
}
public static void agentmain(String args) throws Exception {
AsyncProfiler profiler = AsyncProfiler.getInstance();
ManagementFactory.getPlatformMBeanServer().registerMBean(
profiler,
new ObjectName(AsyncProfilerMXBean.OBJECT_NAME));
if (args != null && !args.isEmpty()) {
profiler.execute(args);
}
}
}

View File

@@ -39,16 +39,22 @@ public class AsyncProfiler implements AsyncProfilerMXBean {
// No need to load library, if it has been preloaded with -agentpath
profiler.getVersion();
} catch (UnsatisfiedLinkError e) {
File file = extractEmbeddedLib();
if (file != null) {
try {
System.load(file.getPath());
} finally {
file.delete();
}
String libraryPath = System.getProperty("one.profiler.libraryPath");
if (libraryPath != null && !libraryPath.isEmpty()) {
System.load(new File(libraryPath).getAbsolutePath());
} else {
System.loadLibrary("asyncProfiler");
File file = extractEmbeddedLib();
if (file != null) {
try {
System.load(file.getAbsolutePath());
} finally {
file.delete();
}
} else {
System.loadLibrary("asyncProfiler");
}
}
}
}
@@ -171,7 +177,7 @@ public class AsyncProfiler implements AsyncProfilerMXBean {
/**
* Execute an agent-compatible profiling command -
* the comma-separated list of arguments described in arguments.cpp
* the comma-separated list of arguments defined in arguments.cpp
*
* @param command Profiling command
* @return The command result
@@ -195,7 +201,7 @@ public class AsyncProfiler implements AsyncProfilerMXBean {
@Override
public String dumpCollapsed(Counter counter) {
try {
return execute0("collapsed," + counter.name().toLowerCase());
return execute0("collapsed," + (counter == Counter.SAMPLES ? "samples" : "total"));
} catch (IOException e) {
throw new IllegalStateException(e);
}
@@ -231,6 +237,23 @@ public class AsyncProfiler implements AsyncProfilerMXBean {
}
}
/**
* Dump collected data in OTLP format.
* <p>
* This API is UNSTABLE and might change or be removed in the next version of async-profiler.
*
* @param counter Which counter to use for aggregation
* @return OTLP representation of the profile
*/
@Override
public byte[] dumpOtlp(Counter counter) {
try {
return execute1("otlp," + (counter == Counter.SAMPLES ? "samples" : "total"));
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
/**
* Add the given thread to the set of profiled threads.
* 'filter' option must be enabled to use this method.
@@ -271,5 +294,7 @@ public class AsyncProfiler implements AsyncProfilerMXBean {
private native String execute0(String command) throws IllegalArgumentException, IllegalStateException, IOException;
private native byte[] execute1(String command) throws IllegalArgumentException, IllegalStateException, IOException;
private native void filterThread0(Thread thread, boolean enable);
}

View File

@@ -17,6 +17,8 @@ package one.profiler;
* }</pre>
*/
public interface AsyncProfilerMXBean {
String OBJECT_NAME = "one.profiler:type=AsyncProfiler";
void start(String event, long interval) throws IllegalStateException;
void resume(String event, long interval) throws IllegalStateException;
void stop() throws IllegalStateException;
@@ -29,4 +31,5 @@ public interface AsyncProfilerMXBean {
String dumpCollapsed(Counter counter);
String dumpTraces(int maxTraces);
String dumpFlat(int maxMethods);
byte[] dumpOtlp(Counter counter);
}

View File

@@ -0,0 +1,2 @@
Agent-Class: one.profiler.Agent
Premain-Class: one.profiler.Agent

View File

@@ -15,8 +15,6 @@
# define unlikely(x) (__builtin_expect(!!(x), 0))
#endif
#define callerPC() __builtin_return_address(0)
#ifdef _LP64
# define LP64_ONLY(code) code
#else // !_LP64
@@ -29,27 +27,26 @@ typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
static inline u64 atomicInc(volatile u64& var, u64 increment = 1) {
return __sync_fetch_and_add(&var, increment);
template<typename T>
static inline T atomicInc(T& var, T increment = 1) {
return __atomic_fetch_add(&var, increment, __ATOMIC_ACQ_REL);
}
static inline int atomicInc(volatile u32& var, int increment = 1) {
return __sync_fetch_and_add(&var, increment);
template<typename T>
static inline T atomicDec(T& var, T decrement = 1) {
return __atomic_fetch_sub(&var, decrement, __ATOMIC_ACQ_REL);
}
static inline int atomicInc(volatile int& var, int increment = 1) {
return __sync_fetch_and_add(&var, increment);
}
static inline u64 loadAcquire(u64& var) {
template<typename T>
static inline T loadAcquire(T& var) {
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
}
static inline void storeRelease(u64& var, u64 value) {
return __atomic_store_n(&var, value, __ATOMIC_RELEASE);
template<typename T, typename U>
static inline void storeRelease(T& var, U value) {
__atomic_store_n(&var, static_cast<T>(value), __ATOMIC_RELEASE);
}
#if defined(__x86_64__) || defined(__i386__)
typedef unsigned char instruction_t;
@@ -58,7 +55,6 @@ const int BREAKPOINT_OFFSET = 0;
const int SYSCALL_SIZE = 2;
const int FRAME_PC_SLOT = 1;
const int PROBE_SP_LIMIT = 4;
const int PLT_HEADER_SIZE = 16;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 8; // PERF_REG_X86_IP
@@ -67,6 +63,7 @@ const int PERF_REG_PC = 8; // PERF_REG_X86_IP
#define rmb() asm volatile("lfence" : : : "memory")
#define flushCache(addr) asm volatile("mfence; clflush (%0); mfence" : : "r" (addr) : "memory")
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() ((void**)__builtin_frame_address(0) + 2)
@@ -79,7 +76,6 @@ const int BREAKPOINT_OFFSET = 0;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int PROBE_SP_LIMIT = 0;
const int PLT_HEADER_SIZE = 20;
const int PLT_ENTRY_SIZE = 12;
const int PERF_REG_PC = 15; // PERF_REG_ARM_PC
@@ -88,6 +84,7 @@ const int PERF_REG_PC = 15; // PERF_REG_ARM_PC
#define rmb() asm volatile("dmb ish" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() __builtin_frame_address(1)
@@ -99,7 +96,6 @@ const int BREAKPOINT_OFFSET = 0;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int PROBE_SP_LIMIT = 0;
const int PLT_HEADER_SIZE = 32;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC
@@ -108,8 +104,9 @@ const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC
#define rmb() asm volatile("dmb ish" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#define callerFP() __builtin_frame_address(1)
#define callerSP() __builtin_frame_address(1)
#define callerPC() ({ void* pc; asm volatile("adr %0, ." : "=r"(pc)); pc; })
#define callerFP() ({ void* fp; asm volatile("mov %0, fp" : "=r"(fp)); fp; })
#define callerSP() ({ void* sp; asm volatile("mov %0, sp" : "=r"(sp)); sp; })
#elif defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
@@ -121,7 +118,6 @@ const int BREAKPOINT_OFFSET = 8;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 2;
const int PROBE_SP_LIMIT = 0;
const int PLT_HEADER_SIZE = 24;
const int PLT_ENTRY_SIZE = 24;
const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP
@@ -130,6 +126,7 @@ const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP
#define rmb() asm volatile ("sync" : : : "memory") // lwsync would do but better safe than sorry
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() __builtin_frame_address(0)
@@ -145,7 +142,6 @@ const int BREAKPOINT_OFFSET = 0;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1; // return address is at -1 from FP
const int PROBE_SP_LIMIT = 0;
const int PLT_HEADER_SIZE = 24; // Best guess from examining readelf
const int PLT_ENTRY_SIZE = 24; // ...same...
const int PERF_REG_PC = 0; // PERF_REG_RISCV_PC
@@ -154,6 +150,7 @@ const int PERF_REG_PC = 0; // PERF_REG_RISCV_PC
#define rmb() asm volatile ("fence" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() __builtin_frame_address(0)
@@ -165,7 +162,6 @@ const int BREAKPOINT_OFFSET = 0;
const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int PROBE_SP_LIMIT = 0;
const int PLT_HEADER_SIZE = 32;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 0; // PERF_REG_LOONGARCH_PC
@@ -174,6 +170,7 @@ const int PERF_REG_PC = 0; // PERF_REG_LOONGARCH_PC
#define rmb() asm volatile("dbar 0x0" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
#define callerPC() __builtin_return_address(0)
#define callerFP() __builtin_frame_address(1)
#define callerSP() __builtin_frame_address(0)
@@ -184,18 +181,20 @@ const int PERF_REG_PC = 0; // PERF_REG_LOONGARCH_PC
#endif
// Return address signing support.
// Apple M1 has 47 bit virtual addresses.
// On Apple M1 and later processors, memory is either writable or executable (W^X)
#if defined(__aarch64__) && defined(__APPLE__)
# define ADDRESS_BITS 47
# define WX_MEMORY true
# define WX_MEMORY true
#else
# define WX_MEMORY false
# define WX_MEMORY false
#endif
#ifdef ADDRESS_BITS
// Pointer authentication (PAC) support.
// Only 48-bit virtual addresses are currently supported.
#ifdef __aarch64__
const unsigned long PAC_MASK = WX_MEMORY ? 0x7fffffffffffUL : 0xffffffffffffUL;
static inline const void* stripPointer(const void* p) {
return (const void*) ((unsigned long)p & ((1UL << ADDRESS_BITS) - 1));
return (const void*) ((unsigned long)p & PAC_MASK);
}
#else
# define stripPointer(p) (p)

View File

@@ -11,6 +11,7 @@
#include <sys/types.h>
#include <unistd.h>
#include "arguments.h"
#include "os.h"
// Arguments of the last start/resume command; reused for shutdown and restart
@@ -22,12 +23,6 @@ const Error Error::OK(NULL);
// Extra buffer space for expanding file pattern
const size_t EXTRA_BUF_SIZE = 512;
static const Multiplier NANOS[] = {{'n', 1}, {'u', 1000}, {'m', 1000000}, {'s', 1000000000}, {0, 0}};
static const Multiplier BYTES[] = {{'b', 1}, {'k', 1024}, {'m', 1048576}, {'g', 1073741824}, {0, 0}};
static const Multiplier SECONDS[] = {{'s', 1}, {'m', 60}, {'h', 3600}, {'d', 86400}, {0, 0}};
static const Multiplier UNIVERSAL[] = {{'n', 1}, {'u', 1000}, {'m', 1000000}, {'s', 1000000000}, {'b', 1}, {'k', 1024}, {'g', 1073741824}, {0, 0}};
// Statically compute hash code of a string containing up to 12 [a-z] letters
#define HASH(s) ((s[0] & 31LL) | (s[1] & 31LL) << 5 | (s[2] & 31LL) << 10 | (s[3] & 31LL) << 15 | \
(s[4] & 31LL) << 20 | (s[5] & 31LL) << 25 | (s[6] & 31LL) << 30 | (s[7] & 31LL) << 35 | \
@@ -42,77 +37,7 @@ static const Multiplier UNIVERSAL[] = {{'n', 1}, {'u', 1000}, {'m', 1000000}, {'
// Parses agent arguments.
// The format of the string is:
// arg[,arg...]
// where arg is one of the following options:
// start - start profiling
// resume - start or resume profiling without resetting collected data
// stop - stop profiling
// dump - dump collected data without stopping profiling session
// check - check if the specified profiling event is available
// status - print profiling status (inactive / running for X seconds)
// meminfo - print profiler memory stats
// list - show the list of available profiling events
// version - display the agent version
// event=EVENT - which event to trace (cpu, wall, cache-misses, etc.)
// alloc[=BYTES] - profile allocations with BYTES interval
// live - build allocation profile from live objects only
// lock[=DURATION] - profile contended locks overflowing the DURATION ns bucket (default: 10us)
// wall[=NS] - run wall clock profiling together with CPU profiling
// nobatch - legacy wall clock sampling without batch events
// collapsed - dump collapsed stacks (the format used by FlameGraph script)
// flamegraph - produce Flame Graph in HTML format
// tree - produce call tree in HTML format
// jfr - dump events in Java Flight Recorder format
// jfropts=OPTIONS - JFR recording options: numeric bitmask or 'mem'
// jfrsync[=CONFIG] - start Java Flight Recording with the given config along with the profiler
// traces[=N] - dump top N call traces
// flat[=N] - dump top N methods (aka flat profile)
// samples - count the number of samples (default)
// total - count the total value (time, bytes, etc.) instead of samples
// chunksize=N - approximate size of JFR chunk in bytes (default: 100 MB)
// chunktime=N - duration of JFR chunk in seconds (default: 1 hour)
// timeout=TIME - automatically stop profiler at TIME (absolute or relative)
// loop=TIME - run profiler in a loop (continuous profiling)
// interval=N - sampling interval in ns (default: 10'000'000, i.e. 10 ms)
// jstackdepth=N - maximum Java stack depth (default: 2048)
// signal=N - use alternative signal for cpu or wall clock profiling
// features=LIST - advanced stack trace features (vtable, comptask, pcaddr)"
// safemode=BITS - disable stack recovery techniques (default: 0, i.e. everything enabled)
// file=FILENAME - output file name for dumping
// log=FILENAME - log warnings and errors to the given dedicated stream
// loglevel=LEVEL - logging level: TRACE, DEBUG, INFO, WARN, ERROR, or NONE
// quiet - do not log "Profiling started/stopped" message
// server=ADDRESS - start insecure HTTP server at ADDRESS/PORT
// filter=FILTER - thread filter
// threads - profile different threads separately
// sched - group threads by scheduling policy
// cstack=MODE - how to collect C stack frames in addition to Java stack
// MODE is 'fp', 'dwarf', 'lbr', 'vm' or 'no'
// clock=SOURCE - clock source for JFR timestamps: 'tsc' or 'monotonic'
// alluser - include only user-mode events
// fdtransfer - use fdtransfer to pass fds to the profiler
// target-cpu=CPU - sample threads on a specific CPU (perf_events only, default: -1)
// simple - simple class names instead of FQN
// dot - dotted class names
// norm - normalize names of hidden classes / lambdas
// sig - print method signatures
// ann - annotate Java methods
// lib - prepend library names
// mcache - max age of jmethodID cache (default: 0 = disabled)
// include=PATTERN - include stack traces containing PATTERN
// exclude=PATTERN - exclude stack traces containing PATTERN
// begin=FUNCTION - begin profiling when FUNCTION is executed
// end=FUNCTION - end profiling when FUNCTION is executed
// nostop - do not stop profiling outside --begin/--end window
// title=TITLE - FlameGraph title
// minwidth=PCT - FlameGraph minimum frame width in percent
// reverse - generate stack-reversed FlameGraph / Call tree (defaults to icicle graph)
// inverted - toggles the layout for reversed stacktraces from icicle to flamegraph
// and for default stacktraces from flamegraph to icicle
//
// It is possible to specify multiple dump options at the same time
// The format of the string is: arg[,arg...]
Error Arguments::parse(const char* args) {
if (args == NULL) {
return Error::OK;
@@ -146,14 +71,11 @@ Error Arguments::parse(const char* args) {
CASE("dump")
_action = ACTION_DUMP;
CASE("check")
_action = ACTION_CHECK;
CASE("status")
_action = ACTION_STATUS;
CASE("meminfo")
_action = ACTION_MEMINFO;
CASE("metrics")
_action = ACTION_METRICS;
CASE("list")
_action = ACTION_LIST;
@@ -197,6 +119,9 @@ Error Arguments::parse(const char* args) {
_output = OUTPUT_TEXT;
_dump_flat = value == NULL ? INT_MAX : atoi(value);
CASE("otlp")
_output = OUTPUT_OTLP;
CASE("samples")
_counter = COUNTER_SAMPLES;
@@ -223,7 +148,9 @@ Error Arguments::parse(const char* args) {
if (_nativemem < 0) _nativemem = 0;
} else if (strcmp(value, EVENT_LOCK) == 0) {
if (_lock < 0) _lock = DEFAULT_LOCK_INTERVAL;
} else if (_event != NULL) {
} else if (strcmp(value, EVENT_NATIVELOCK) == 0) {
if (_nativelock < 0) _nativelock = DEFAULT_LOCK_INTERVAL;
} else if (_event != NULL && !_all) {
msg = "Duplicate event argument";
} else {
_event = value;
@@ -235,26 +162,40 @@ Error Arguments::parse(const char* args) {
}
CASE("loop")
_loop = true;
if (value == NULL || (_timeout = parseTimeout(value)) == -1) {
if (value == NULL || (_loop = parseTimeout(value)) == -1) {
msg = "Invalid loop duration";
}
CASE("memlimit")
_mem_limit = value == NULL ? 0 : parseUnits(value, BYTES);
CASE("alloc")
_alloc = value == NULL ? 0 : parseUnits(value, BYTES);
CASE("tlab")
_tlab = true;
CASE("nativemem")
_nativemem = value == NULL ? 0 : parseUnits(value, BYTES);
CASE("nofree")
_nofree = true;
CASE("trace")
_trace.push_back(value);
CASE("lock")
_lock = value == NULL ? 0 : parseUnits(value, NANOS);
_lock = value == NULL ? DEFAULT_LOCK_INTERVAL : parseUnits(value, NANOS);
CASE("nativelock")
_nativelock = value == NULL ? DEFAULT_LOCK_INTERVAL : parseUnits(value, NANOS);
CASE("wall")
_wall = value == NULL ? 0 : parseUnits(value, NANOS);
CASE("proc")
_proc = value == NULL ? DEFAULT_PROC_INTERVAL : parseUnits(value, SECONDS);
CASE("cpu")
if (_event != NULL) {
msg = "Duplicate event argument";
@@ -262,6 +203,33 @@ Error Arguments::parse(const char* args) {
_event = EVENT_CPU;
}
CASE("all")
_all = true;
_live = true;
if (_wall < 0) {
_wall = 0;
}
if (_alloc < 0) {
_alloc = 0;
}
if (_lock < 0) {
_lock = DEFAULT_LOCK_INTERVAL;
}
if (_nativelock < 0) {
_nativelock = DEFAULT_LOCK_INTERVAL;
}
if (_nativemem < 0) {
_nativemem = DEFAULT_ALLOC_INTERVAL;
}
if (_proc < 0 && OS::isLinux()) {
_proc = DEFAULT_PROC_INTERVAL;
}
if (_event == NULL && OS::isLinux()) {
_event = EVENT_CPU;
}
CASE("interval")
if (value == NULL || (_interval = parseUnits(value, UNIVERSAL)) <= 0) {
msg = "Invalid interval";
@@ -270,6 +238,9 @@ Error Arguments::parse(const char* args) {
CASE("jstackdepth")
if (value == NULL || (_jstackdepth = atoi(value)) <= 0) {
msg = "jstackdepth must be > 0";
} else {
char* slash = strchr(value, '/');
_truncated_stack_depth = slash != NULL ? atoi(slash + 1) : _jstackdepth;
}
CASE("signal")
@@ -283,23 +254,14 @@ Error Arguments::parse(const char* args) {
CASE("features")
if (value != NULL) {
if (strstr(value, "stats")) _features.stats = 1;
if (strstr(value, "probesp")) _features.probe_sp = 1;
if (strstr(value, "jnienv")) _features.jnienv = 1;
if (strstr(value, "agct")) _features.agct = 1;
if (strstr(value, "mixed")) _features.mixed = 1;
if (strstr(value, "vtable")) _features.vtable_target = 1;
if (strstr(value, "comptask")) _features.comp_task = 1;
if (strstr(value, "pcaddr")) _features.pc_addr = 1;
}
CASE("safemode") {
// Left for compatibility purpose; will be eventually migrated to 'features'
int bits = value == NULL ? INT_MAX : (int)strtol(value, NULL, 0);
_features.unknown_java = (bits & 1) ? 0 : 1;
_features.unwind_stub = (bits & 2) ? 0 : 1;
_features.unwind_comp = (bits & 4) ? 0 : 1;
_features.unwind_native = (bits & 8) ? 0 : 1;
_features.java_anchor = (bits & 16) ? 0 : 1;
_features.gc_traces = (bits & 32) ? 0 : 1;
}
CASE("file")
if (value == NULL || value[0] == 0) {
msg = "file must not be empty";
@@ -336,12 +298,10 @@ Error Arguments::parse(const char* args) {
_filter = value == NULL ? "" : value;
CASE("include")
// Workaround -Wstringop-overflow warning
if (value == arg + 8) appendToEmbeddedList(_include, arg + 8);
_include.push_back(value);
CASE("exclude")
// Workaround -Wstringop-overflow warning
if (value == arg + 8) appendToEmbeddedList(_exclude, arg + 8);
_exclude.push_back(value);
CASE("threads")
_threads = true;
@@ -349,6 +309,9 @@ Error Arguments::parse(const char* args) {
CASE("sched")
_sched = true;
CASE("record-cpu")
_record_cpu = true;
CASE("live")
_live = true;
@@ -364,12 +327,12 @@ Error Arguments::parse(const char* args) {
_cstack = CSTACK_FP;
} else if (strcmp(value, "dwarf") == 0) {
_cstack = CSTACK_DWARF;
} else if (strcmp(value, "lbr") == 0) {
_cstack = CSTACK_LBR;
} else if (strcmp(value, "vm") == 0) {
_cstack = CSTACK_VM;
} else if (strcmp(value, "vmx") == 0) {
_cstack = CSTACK_VMX;
// cstack=vmx is a shorthand for cstack=vm,features=mixed
_cstack = CSTACK_VM;
_features.mixed = 1;
} else {
_cstack = CSTACK_NO;
}
@@ -420,6 +383,13 @@ Error Arguments::parse(const char* args) {
CASE("nostop")
_nostop = true;
CASE("ttsp")
if (_begin != NULL || _end != NULL) {
msg = "begin and end must both be empty when ttsp is set";
}
_begin = "SafepointSynchronize::begin";
_end = "RuntimeService::record_safepoint_synchronized";
// FlameGraph options
CASE("title")
_title = value;
@@ -443,7 +413,7 @@ Error Arguments::parse(const char* args) {
return Error(msg);
}
if (_event == NULL && _alloc < 0 && _lock < 0 && _wall < 0 && _nativemem < 0) {
if (_event == NULL && _alloc < 0 && _lock < 0 && _wall < 0 && _nativemem < 0 && _nativelock < 0 && _trace.empty()) {
_event = EVENT_CPU;
}
@@ -475,12 +445,6 @@ bool Arguments::hasTemporaryLog() const {
return _log != NULL && strncmp(_log, "/tmp/asprof-log.", 16) == 0;
}
// The linked list of string offsets is embedded right into _buf array
void Arguments::appendToEmbeddedList(int& list, char* value) {
((int*)value)[-1] = list;
list = (int)(value - _buf);
}
// Should match statically computed HASH(arg)
long long Arguments::hash(const char* arg) {
long long h = 0;

View File

@@ -7,17 +7,20 @@
#define _ARGUMENTS_H
#include <stddef.h>
#include <vector>
const long DEFAULT_INTERVAL = 10000000; // 10 ms
const long DEFAULT_ALLOC_INTERVAL = 524287; // 512 KiB
const long DEFAULT_LOCK_INTERVAL = 10000; // 10 us
const long DEFAULT_PROC_INTERVAL = 30; // 30 seconds
const int DEFAULT_JSTACKDEPTH = 2048;
const char* const EVENT_CPU = "cpu";
const char* const EVENT_ALLOC = "alloc";
const char* const EVENT_NATIVEMEM = "nativemem";
const char* const EVENT_LOCK = "lock";
const char* const EVENT_NATIVELOCK = "nativelock";
const char* const EVENT_WALL = "wall";
const char* const EVENT_CTIMER = "ctimer";
const char* const EVENT_ITIMER = "itimer";
@@ -30,9 +33,8 @@ enum SHORT_ENUM Action {
ACTION_RESUME,
ACTION_STOP,
ACTION_DUMP,
ACTION_CHECK,
ACTION_STATUS,
ACTION_MEMINFO,
ACTION_METRICS,
ACTION_LIST,
ACTION_VERSION
};
@@ -58,9 +60,7 @@ enum SHORT_ENUM CStack {
CSTACK_NO, // do not collect native frames
CSTACK_FP, // walk stack using Frame Pointer links
CSTACK_DWARF, // use DWARF unwinding info from .eh_frame section
CSTACK_LBR, // Last Branch Record hardware capability
CSTACK_VM, // unwind using HotSpot VMStructs
CSTACK_VMX // same as CSTACK_VM but with intermediate native frames
CSTACK_VM // unwind using HotSpot VMStructs
};
enum SHORT_ENUM Clock {
@@ -76,7 +76,8 @@ enum SHORT_ENUM Output {
OUTPUT_COLLAPSED,
OUTPUT_FLAMEGRAPH,
OUTPUT_TREE,
OUTPUT_JFR
OUTPUT_JFR,
OUTPUT_OTLP
};
enum JfrOption {
@@ -91,28 +92,27 @@ enum JfrOption {
JFR_SYNC_OPTS = NO_SYSTEM_INFO | NO_SYSTEM_PROPS | NO_NATIVE_LIBS | NO_CPU_LOAD | NO_HEAP_SUMMARY
};
// Keep this in sync with JfrSync.java
enum EventMask {
EM_CPU = 1,
EM_ALLOC = 2,
EM_LOCK = 4,
EM_WALL = 8,
EM_NATIVEMEM = 16,
EM_NATIVELOCK = 32,
EM_METHOD_TRACE = 64
};
constexpr int EVENT_MASK_SIZE = 7;
struct StackWalkFeatures {
// Stack recovery techniques used to workaround AsyncGetCallTrace flaws
unsigned short unknown_java : 1;
unsigned short unwind_stub : 1;
unsigned short unwind_comp : 1;
unsigned short unwind_native : 1;
unsigned short java_anchor : 1;
unsigned short gc_traces : 1;
// Common features
unsigned short stats : 1;
// Additional HotSpot-specific features
unsigned short probe_sp : 1;
unsigned short vtable_target : 1;
unsigned short comp_task : 1;
unsigned short pc_addr : 1;
unsigned short _reserved : 5;
StackWalkFeatures() : unknown_java(1), unwind_stub(1), unwind_comp(1), unwind_native(1), java_anchor(1), gc_traces(1),
stats(0), probe_sp(0), vtable_target(0), comp_task(0), pc_addr(0), _reserved(0) {
}
unsigned short stats : 1; // collect stack walking duration statistics
unsigned short jnienv : 1; // verify JNIEnv* obtained using VMStructs
unsigned short agct : 1; // force usage of AsyncGetCallTrace instead of VMStructs
unsigned short mixed : 1; // mixed stack traces with Java and native frames interleaved
unsigned short vtable_target : 1; // show receiver classes of vtable/itable stubs
unsigned short comp_task : 1; // display current compilation task for JIT threads
unsigned short pc_addr : 1; // record exact PC address for each sample
unsigned short _padding : 9; // pad structure to 16 bits
};
@@ -121,6 +121,10 @@ struct Multiplier {
long multiplier;
};
constexpr Multiplier NANOS[] = {{'n', 1}, {'u', 1000}, {'m', 1000000}, {'s', 1000000000}, {0, 0}};
constexpr Multiplier BYTES[] = {{'b', 1}, {'k', 1024}, {'m', 1048576}, {'g', 1073741824}, {0, 0}};
constexpr Multiplier SECONDS[] = {{'s', 1}, {'m', 60}, {'h', 3600}, {'d', 86400}, {0, 0}};
constexpr Multiplier UNIVERSAL[] = {{'n', 1}, {'u', 1000}, {'m', 1000000}, {'s', 1000000000}, {'b', 1}, {'k', 1024}, {'g', 1073741824}, {0, 0}};
class Error {
private:
@@ -147,25 +151,30 @@ class Arguments {
char* _buf;
bool _shared;
void appendToEmbeddedList(int& list, char* value);
const char* expandFilePattern(const char* pattern);
static long long hash(const char* arg);
static Output detectOutputFormat(const char* file);
static long parseUnits(const char* str, const Multiplier* multipliers);
static int parseTimeout(const char* str);
public:
Action _action;
Counter _counter;
const char* _event;
std::vector<const char*> _trace;
int _timeout;
int _loop;
size_t _mem_limit;
long _interval;
long _alloc;
long _nativemem;
long _lock;
long _nativelock;
long _wall;
long _proc;
bool _all;
int _jstackdepth;
int _truncated_stack_depth;
int _signal;
const char* _file;
const char* _log;
@@ -173,14 +182,15 @@ class Arguments {
const char* _unknown_arg;
const char* _server;
const char* _filter;
int _include;
int _exclude;
std::vector<const char*> _include;
std::vector<const char*> _exclude;
unsigned char _mcache;
bool _loop;
bool _preloaded;
bool _quiet;
bool _threads;
bool _sched;
bool _record_cpu;
bool _tlab;
bool _live;
bool _nofree;
bool _nobatch;
@@ -215,13 +225,20 @@ class Arguments {
_action(ACTION_NONE),
_counter(COUNTER_SAMPLES),
_event(NULL),
_trace(),
_timeout(0),
_loop(0),
_mem_limit(0),
_interval(0),
_alloc(-1),
_nativemem(-1),
_lock(-1),
_nativelock(-1),
_wall(-1),
_proc(-1),
_all(false),
_jstackdepth(DEFAULT_JSTACKDEPTH),
_truncated_stack_depth(DEFAULT_JSTACKDEPTH),
_signal(0),
_file(NULL),
_log(NULL),
@@ -229,14 +246,15 @@ class Arguments {
_unknown_arg(NULL),
_server(NULL),
_filter(NULL),
_include(0),
_exclude(0),
_include(),
_exclude(),
_mcache(0),
_loop(false),
_preloaded(false),
_quiet(false),
_threads(false),
_sched(false),
_record_cpu(false),
_tlab(false),
_live(false),
_nofree(false),
_nobatch(false),
@@ -246,7 +264,7 @@ class Arguments {
_fdtransfer_path(NULL),
_target_cpu(-1),
_style(0),
_features(),
_features{},
_cstack(CSTACK_DEFAULT),
_clock(CLK_DEFAULT),
_output(OUTPUT_NONE),
@@ -277,15 +295,24 @@ class Arguments {
bool hasOutputFile() const {
return _file != NULL &&
(_action == ACTION_STOP || _action == ACTION_DUMP ? _output != OUTPUT_JFR : _action >= ACTION_CHECK);
(_action == ACTION_STOP || _action == ACTION_DUMP ? _output != OUTPUT_JFR : _action >= ACTION_STATUS);
}
bool hasOption(JfrOption option) const {
return (_jfr_options & option) != 0;
}
friend class FrameName;
friend class Recording;
int eventMask() const {
return (_event != NULL ? EM_CPU : 0) |
(_alloc >= 0 ? EM_ALLOC : 0) |
(_lock >= 0 ? EM_LOCK : 0) |
(_wall >= 0 ? EM_WALL : 0) |
(_nativemem >= 0 ? EM_NATIVEMEM : 0) |
(_nativelock >= 0 ? EM_NATIVELOCK : 0) |
(!_trace.empty() ? EM_METHOD_TRACE : 0);
}
static long parseUnits(const char* str, const Multiplier* multipliers);
};
extern Arguments _global_args;

View File

@@ -6,7 +6,9 @@
#include "asprof.h"
#include "hooks.h"
#include "profiler.h"
#include "tsc.h"
#include "threadLocalData.h"
#include "userEvents.h"
static asprof_error_t asprof_error(const char* msg) {
return (asprof_error_t)msg;
@@ -53,3 +55,23 @@ DLLEXPORT asprof_error_t asprof_execute(const char* command, asprof_writer_t out
DLLEXPORT asprof_thread_local_data* asprof_get_thread_local_data(void) {
return ThreadLocalData::getThreadLocalData();
}
DLLEXPORT asprof_jfr_event_key asprof_register_jfr_event(const char* name) {
return UserEvents::registerEvent(name);
}
#define asprof_str(s) #s
DLLEXPORT asprof_error_t asprof_emit_jfr_event(asprof_jfr_event_key type, const uint8_t* data, size_t len) {
if (len > ASPROF_MAX_JFR_EVENT_LENGTH) {
return asprof_error("Unable to emit JFR event larger than " asprof_str(ASPROF_MAX_JFR_EVENT_LENGTH) " bytes");
}
UserEvent event;
event._start_time = TSC::ticks();
event._type = type;
event._data = data;
event._len = len;
Profiler::instance()->recordEventOnly(USER_EVENT, &event);
return NULL;
}

View File

@@ -63,6 +63,42 @@ typedef struct {
DLLEXPORT asprof_thread_local_data* asprof_get_thread_local_data(void);
typedef asprof_thread_local_data* (*asprof_get_thread_local_data_t)(void);
typedef int asprof_jfr_event_key;
// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
//
// Return a asprof_jfr_event_key identifier for a user-defined JFR key.
// That identifier can then be used in `asprof_emit_jfr_event`
//
// The name is required to be valid (since it's a C string, NUL-free) UTF-8.
//
// Returns -1 on failure.
DLLEXPORT asprof_jfr_event_key asprof_register_jfr_event(const char* name);
typedef asprof_jfr_event_key (*asprof_register_jfr_event_t)(const char* name);
#define ASPROF_MAX_JFR_EVENT_LENGTH 2048
// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
//
// Emits a custom, user-defined JFR event. The key should be created via `asprof_register_jfr_event`.
// The data can be arbitrary binary data, with size <= ASPROF_MAX_JFR_EVENT_LENGTH.
//
// User-defined events are included in the JFR under a `profiler.UserEvent` event type. That type will contain
// (at least) the following fields:
// 1. `startTime` [Long] - the emitted event's time in ticks.
// 2. `eventThread` [java.lang.Thread] - the thread that emitted the events.
// 3. `type` [profiler.types.UserEventType] - the event's type,
// where `profiler.types.UserEventType` is an indexed string from the JFR constant pool.
// 4. `data` [String] - the event data. This is the Latin-1 encoded version of the inputted data.
// The Latin-1 encoding is used as a way to stuff the arbitrary byte input into something
// that JFR supports (JFR technically supports byte arrays, but `jfr print` doesn't).
//
// Returns an error code or NULL on success.
DLLEXPORT asprof_error_t asprof_emit_jfr_event(asprof_jfr_event_key type, const uint8_t* data, size_t len);
typedef asprof_error_t (*asprof_emit_jfr_event_t)(asprof_jfr_event_key type, const uint8_t* data, size_t len);
#ifdef __cplusplus
}
#endif

View File

@@ -3,6 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdint.h>
#include <string.h>
#include "callTraceStorage.h"
#include "os.h"
@@ -12,6 +13,7 @@
static const u32 INITIAL_CAPACITY = 65536;
static const u32 CALL_TRACE_CHUNK = 8 * 1024 * 1024;
static const u32 OVERFLOW_TRACE_ID = 0x7fffffff;
static const size_t MEM_LIMIT_EXTRA = 0x10000; // reserve up to 64 KB for LongHashTable headers
class LongHashTable {
@@ -83,6 +85,8 @@ CallTrace CallTraceStorage::_overflow_trace = {1, {BCI_ERROR, LP64_ONLY(0 COMMA)
CallTraceStorage::CallTraceStorage() : _allocator(CALL_TRACE_CHUNK) {
_current_table = LongHashTable::allocate(NULL, INITIAL_CAPACITY);
_used_memory = _current_table->usedMemory();
_mem_limit = SIZE_MAX;
_overflow = 0;
}
@@ -92,12 +96,14 @@ CallTraceStorage::~CallTraceStorage() {
}
}
void CallTraceStorage::clear() {
void CallTraceStorage::clear(size_t mem_limit) {
while (_current_table->prev() != NULL) {
_current_table = _current_table->destroy();
}
_current_table->clear();
_used_memory = _current_table->usedMemory();
_allocator.clear();
_mem_limit = mem_limit ? mem_limit | MEM_LIMIT_EXTRA : SIZE_MAX;
_overflow = 0;
}
@@ -108,11 +114,7 @@ u32 CallTraceStorage::capacity() {
}
size_t CallTraceStorage::usedMemory() {
size_t bytes = _allocator.usedMemory();
for (LongHashTable* table = _current_table; table != NULL; table = table->prev()) {
bytes += table->usedMemory();
}
return bytes;
return _used_memory + _allocator.usedMemory();
}
void CallTraceStorage::collectTraces(std::map<u32, CallTrace*>& map) {
@@ -241,15 +243,23 @@ u32 CallTraceStorage::put(int num_frames, ASGCT_CallFrame* frames, u64 counter)
while (keys[slot] != hash) {
if (keys[slot] == 0) {
if (usedMemory() > _mem_limit) {
// Stop adding new stack traces once memory limit is exceeded
atomicInc(_overflow);
return OVERFLOW_TRACE_ID;
}
if (!__sync_bool_compare_and_swap(&keys[slot], 0, hash)) {
continue;
}
// Increment the table size, and if the load factor exceeds 0.75, reserve a new table
// Increment the table size, and if the load factor exceeds 0.75, reserve a new table.
// This condition can be hit only once per table, so the below allocation is race-free.
if (table->incSize() == capacity * 3 / 4) {
LongHashTable* new_table = LongHashTable::allocate(table, capacity * 2);
if (new_table != NULL) {
__sync_bool_compare_and_swap(&_current_table, table, new_table);
atomicInc(_used_memory, new_table->usedMemory());
storeRelease(_current_table, new_table);
}
}

View File

@@ -26,11 +26,11 @@ struct CallTraceSample {
u64 counter;
CallTrace* acquireTrace() {
return __atomic_load_n(&trace, __ATOMIC_ACQUIRE);
return loadAcquire(trace);
}
void setTrace(CallTrace* value) {
return __atomic_store_n(&trace, value, __ATOMIC_RELEASE);
storeRelease(trace, value);
}
CallTraceSample& operator+=(const CallTraceSample& s) {
@@ -47,6 +47,8 @@ class CallTraceStorage {
LinearAllocator _allocator;
LongHashTable* _current_table;
size_t _used_memory;
size_t _mem_limit;
u64 _overflow;
u64 calcHash(int num_frames, ASGCT_CallFrame* frames);
@@ -57,9 +59,10 @@ class CallTraceStorage {
CallTraceStorage();
~CallTraceStorage();
void clear();
void clear(size_t mem_limit);
u32 capacity();
size_t usedMemory();
u64 overflow() { return _overflow; }
void collectTraces(std::map<u32, CallTrace*>& map);
void collectSamples(std::vector<CallTraceSample*>& samples);

View File

@@ -9,6 +9,7 @@
#include <sys/mman.h>
#include "codeCache.h"
#include "dwarf.h"
#include "log.h"
#include "os.h"
@@ -28,19 +29,22 @@ size_t NativeFunc::usedMemory(const char* name) {
}
CodeCache::CodeCache(const char* name, short lib_index, bool imports_patchable,
const void* min_address, const void* max_address) {
CodeCache::CodeCache(const char* name, short lib_index,
const void* min_address, const void* max_address,
const char* image_base) {
_name = NativeFunc::create(name, -1);
_lib_index = lib_index;
_min_address = min_address;
_max_address = max_address;
_text_base = NULL;
_image_base = image_base;
_plt_offset = 0;
_plt_size = 0;
memset(_imports, 0, sizeof(_imports));
_imports_patchable = imports_patchable;
_imports_patchable = false;
_debug_symbols = false;
_dwarf_table = NULL;
@@ -159,13 +163,19 @@ const void* CodeCache::findSymbolByPrefix(const char* prefix) {
}
const void* CodeCache::findSymbolByPrefix(const char* prefix, int prefix_len) {
const void* result = NULL;
for (int i = 0; i < _count; i++) {
const char* blob_name = _blobs[i]._name;
if (blob_name != NULL && strncmp(blob_name, prefix, prefix_len) == 0) {
return _blobs[i]._start;
result = _blobs[i]._start;
// Symbols which contain a dot are only patched if no alternative is found,
// see #1247
if (strchr(blob_name + prefix_len, '.') == NULL) {
return result;
}
}
}
return NULL;
return result;
}
void CodeCache::saveImport(ImportId id, void** entry) {
@@ -209,6 +219,12 @@ void CodeCache::addImport(void** entry, const char* name) {
saveImport(im_pthread_create, entry);
} else if (strcmp(name, "pthread_exit") == 0) {
saveImport(im_pthread_exit, entry);
} else if (strcmp(name, "pthread_mutex_lock") == 0) {
saveImport(im_pthread_mutex_lock, entry);
} else if (strcmp(name, "pthread_rwlock_rdlock") == 0) {
saveImport(im_pthread_rwlock_rdlock, entry);
} else if (strcmp(name, "pthread_rwlock_wrlock") == 0) {
saveImport(im_pthread_rwlock_wrlock, entry);
} else if (strcmp(name, "pthread_setspecific") == 0) {
saveImport(im_pthread_setspecific, entry);
} else if (strcmp(name, "poll") == 0) {
@@ -228,15 +244,13 @@ void CodeCache::addImport(void** entry, const char* name) {
void** CodeCache::findImport(ImportId id) {
if (!_imports_patchable) {
makeImportsPatchable();
_imports_patchable = true;
}
return _imports[id][PRIMARY];
}
void CodeCache::patchImport(ImportId id, void* hook_func) {
if (!_imports_patchable) {
makeImportsPatchable();
_imports_patchable = true;
if (!_imports_patchable && !makeImportsPatchable()) {
return;
}
for (int ty = 0; ty < NUM_IMPORT_TYPES; ty++) {
@@ -247,7 +261,7 @@ void CodeCache::patchImport(ImportId id, void* hook_func) {
}
}
void CodeCache::makeImportsPatchable() {
bool CodeCache::makeImportsPatchable() {
void** min_import = (void**)-1;
void** max_import = NULL;
for (int i = 0; i < NUM_IMPORTS; i++) {
@@ -262,8 +276,14 @@ void CodeCache::makeImportsPatchable() {
if (max_import != NULL) {
uintptr_t patch_start = (uintptr_t)min_import & ~OS::page_mask;
uintptr_t patch_end = (uintptr_t)max_import & ~OS::page_mask;
mprotect((void*)patch_start, patch_end - patch_start + OS::page_size, PROT_READ | PROT_WRITE);
if (OS::mprotect((void*)patch_start, patch_end - patch_start + OS::page_size, PROT_READ | PROT_WRITE) != 0) {
Log::warn("Could not patch %s", name());
return false;
}
}
_imports_patchable = true;
return true;
}
void CodeCache::setDwarfTable(FrameDesc* table, int length) {
@@ -303,5 +323,5 @@ size_t CodeCache::usedMemory() {
for (int i = 0; i < _count; i++) {
bytes += NativeFunc::usedMemory(_blobs[i]._name);
}
return bytes;
return bytes + sizeof(CodeCache);
}

View File

@@ -7,6 +7,7 @@
#define _CODECACHE_H
#include <jvmti.h>
#include "arch.h"
#define NO_MIN_ADDRESS ((const void*)-1)
@@ -20,6 +21,9 @@ enum ImportId {
im_dlopen,
im_pthread_create,
im_pthread_exit,
im_pthread_mutex_lock,
im_pthread_rwlock_rdlock,
im_pthread_rwlock_wrlock,
im_pthread_setspecific,
im_poll,
im_malloc,
@@ -107,6 +111,7 @@ class CodeCache {
const void* _min_address;
const void* _max_address;
const char* _text_base;
const char* _image_base;
unsigned int _plt_offset;
unsigned int _plt_size;
@@ -123,15 +128,15 @@ class CodeCache {
CodeBlob* _blobs;
void expand();
void makeImportsPatchable();
bool makeImportsPatchable();
void saveImport(ImportId id, void** entry);
public:
CodeCache(const char* name,
short lib_index = -1,
bool imports_patchable = false,
const void* min_address = NO_MIN_ADDRESS,
const void* max_address = NO_MAX_ADDRESS);
const void* max_address = NO_MAX_ADDRESS,
const char* image_base = NULL);
~CodeCache();
@@ -147,6 +152,10 @@ class CodeCache {
return _max_address;
}
const char* imageBase() const {
return _image_base;
}
bool contains(const void* address) const {
return address >= _min_address && address < _max_address;
}
@@ -189,7 +198,7 @@ class CodeCache {
void addImport(void** entry, const char* name);
void** findImport(ImportId id);
void patchImport(ImportId, void* hook_func);
void patchImport(ImportId id, void* hook_func);
CodeBlob* findBlob(const char* name);
CodeBlob* findBlobByAddress(const void* address);
@@ -202,6 +211,8 @@ class CodeCache {
FrameDesc* findFrameDesc(const void* pc);
size_t usedMemory();
friend class UnloadProtection;
};
@@ -209,6 +220,7 @@ class CodeCacheArray {
private:
CodeCache* _libs[MAX_NATIVE_LIBS];
int _count;
size_t _used_memory;
public:
CodeCacheArray() : _count(0) {
@@ -219,13 +231,18 @@ class CodeCacheArray {
}
int count() {
return __atomic_load_n(&_count, __ATOMIC_ACQUIRE);
return loadAcquire(_count);
}
size_t usedMemory() {
return _used_memory;
}
void add(CodeCache* lib) {
int index = __atomic_load_n(&_count, __ATOMIC_ACQUIRE);
int index = loadAcquire(_count);
_libs[index] = lib;
__atomic_store_n(&_count, index + 1, __ATOMIC_RELEASE);
_used_memory += lib->usedMemory();
storeRelease(_count, index + 1);
}
};

View File

@@ -11,7 +11,7 @@ import java.util.*;
import java.util.regex.Pattern;
public class Arguments {
public String title = "Flame Graph";
public String title;
public String highlight;
public String output;
public String state;
@@ -19,17 +19,22 @@ public class Arguments {
public Pattern exclude;
public double minwidth;
public double grain;
public double tail = 0.1;
public int skip;
public boolean help;
public boolean reverse;
public boolean inverted;
public boolean diff;
public boolean cpu;
public boolean cpuTime;
public boolean wall;
public boolean alloc;
public boolean nativemem;
public boolean nativelock;
public boolean leak;
public boolean live;
public boolean lock;
public boolean trace;
public boolean threads;
public boolean classify;
public boolean total;
@@ -40,6 +45,7 @@ public class Arguments {
public boolean dot;
public long from;
public long to;
public long latency = -1;
public final List<String> files = new ArrayList<>();
public Arguments(String... args) {
@@ -47,7 +53,7 @@ public class Arguments {
String arg = args[i];
String fieldName;
if (arg.startsWith("--")) {
fieldName = arg.substring(2);
fieldName = toCamelCase(arg.substring(2));
} else if (arg.startsWith("-") && arg.length() == 2) {
fieldName = alias(arg.charAt(1));
} else {
@@ -69,7 +75,7 @@ public class Arguments {
} else if (type == int.class) {
f.setInt(this, Integer.parseInt(args[++i]));
} else if (type == double.class) {
f.setDouble(this, Double.parseDouble(args[++i]));
f.setDouble(this, parseRatio(args[++i]));
} else if (type == long.class) {
f.setLong(this, parseTimestamp(args[++i]));
} else if (type == Pattern.class) {
@@ -104,6 +110,21 @@ public class Arguments {
}
}
private static String toCamelCase(String name) {
for (int i; (i = name.lastIndexOf('-', name.length() - 2)) >= 0; ) {
name = name.substring(0, i) + Character.toUpperCase(name.charAt(i + 1)) + name.substring(i + 2);
}
return name;
}
// Absolute floating point value or percentage followed by %
private static double parseRatio(String value) {
if (value.endsWith("%")) {
return Double.parseDouble(value.substring(0, value.length() - 1)) / 100;
}
return Double.parseDouble(value);
}
// Milliseconds or HH:mm:ss.S or yyyy-MM-dd'T'HH:mm:ss.S
private static long parseTimestamp(String time) {
if (time.indexOf(':') < 0) {

View File

@@ -0,0 +1,39 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.util.ArrayList;
public class BidirectionalIndex<T> extends Index<T> {
private final ArrayList<T> reverseIndex;
public BidirectionalIndex(Class<T> cls, T empty) {
this(cls, empty, 256);
}
public BidirectionalIndex(Class<T> cls, T empty, int initialCapacity) {
super(cls, empty, initialCapacity);
this.reverseIndex = new ArrayList<>(initialCapacity);
this.reverseIndex.add(empty);
}
@Override
public int index(T key) {
assert super.size() == reverseIndex.size();
int idx = super.index(key);
if (idx < reverseIndex.size()) {
// Key already exists
return idx;
}
assert idx == reverseIndex.size();
reverseIndex.add(key);
return idx;
}
public T getKey(int idx) {
return reverseIndex.get(idx);
}
}

View File

@@ -20,18 +20,24 @@ public class FlameGraph implements Comparator<Frame> {
private static final String[] FRAME_SUFFIX = {"_[0]", "_[j]", "_[i]", "", "", "_[k]", "_[1]"};
private static final byte HAS_SUFFIX = (byte) 0x80;
private static final int FLUSH_THRESHOLD = 15000;
private static final long NEW_FRAME_DIFF = Long.MIN_VALUE;
private static final Pattern TID_FRAME_PATTERN = Pattern.compile("\\[(.* )?tid=\\d+]");
private final Arguments args;
private final Index<String> cpool = new Index<>(String.class, "");
private final Frame root = new Frame(0, TYPE_NATIVE);
private final StringBuilder outbuf = new StringBuilder(FLUSH_THRESHOLD + 1000);
private String title = "Flame Graph";
private int[] order;
private int[] cpoolMap;
private int depth;
private int lastLevel;
private long lastX;
private long lastTotal;
private long lastDiff;
private long mintotal;
private long maxdiff = -1;
public FlameGraph(Arguments args) {
this.args = args;
@@ -71,7 +77,11 @@ public class FlameGraph implements Comparator<Frame> {
boolean needRebuild = args.reverse || args.include != null || args.exclude != null;
try (BufferedReader br = new BufferedReader(in)) {
while (!br.readLine().startsWith("const cpool")) ;
for (String line; !(line = br.readLine()).startsWith("const cpool"); ) {
if (line.startsWith("<h1")) {
title = line.substring(line.indexOf('>') + 1, line.lastIndexOf("</h1>"));
}
}
br.readLine();
String s = "";
@@ -84,6 +94,8 @@ public class FlameGraph implements Comparator<Frame> {
while (!br.readLine().isEmpty()) ;
for (String line; !(line = br.readLine()).isEmpty(); ) {
if (line.startsWith("d=")) continue; // artifact of a differential flame graph
StringTokenizer st = new StringTokenizer(line.substring(2, line.length() - 1), ",");
int nameAndType = Integer.parseInt(st.nextToken());
@@ -103,12 +115,10 @@ public class FlameGraph implements Comparator<Frame> {
int titleIndex = nameAndType >>> 3;
byte type = (byte) (nameAndType & 7);
if (st.hasMoreTokens() && (type <= TYPE_INLINED || type >= TYPE_C1_COMPILED)) {
type = TYPE_JIT_COMPILED;
}
byte normalizedType = type <= TYPE_INLINED || type >= TYPE_C1_COMPILED ? TYPE_JIT_COMPILED : type;
Frame f = level > 0 || needRebuild ? new Frame(titleIndex, type) : root;
f.self = f.total = total;
Frame f = level > 0 || needRebuild ? new Frame(titleIndex, normalizedType) : root;
fillFrameCounters(f, type, total);
if (st.hasMoreTokens()) f.inlined = Long.parseLong(st.nextToken());
if (st.hasMoreTokens()) f.c1 = Long.parseLong(st.nextToken());
if (st.hasMoreTokens()) f.interpreted = Long.parseLong(st.nextToken());
@@ -171,6 +181,32 @@ public class FlameGraph implements Comparator<Frame> {
depth = Math.max(depth, stack.size);
}
public void diff(FlameGraph base) {
// Build a map that translates this cpool keys to the base flamegraph's cpool keys
cpoolMap = Arrays.stream(cpool.keys()).mapToInt(title -> base.cpool.getOrDefault(title, -1)).toArray();
diff(base.root, root);
}
private void diff(Frame base, Frame current) {
current.diff = base == null ? NEW_FRAME_DIFF : current.self - base.self;
maxdiff = Math.max(maxdiff, Math.abs(current.diff));
for (Frame child : current.values()) {
Frame baseChild = base == null ? null : base.get(translateKey(child.key));
diff(baseChild, child);
}
}
private int translateKey(int key) {
return cpoolMap[key & TITLE_MASK] | (key & ~TITLE_MASK);
}
public void dump(OutputStream out) throws IOException {
try (PrintStream ps = new PrintStream(out, false, "UTF-8")) {
dump(ps);
}
}
public void dump(PrintStream out) {
mintotal = (long) (root.total * args.minwidth / 100);
@@ -186,13 +222,16 @@ public class FlameGraph implements Comparator<Frame> {
out.print(Math.min(depth * 16, 32767));
tail = printTill(out, tail, "/*title:*/");
out.print(args.title);
out.print(args.title != null ? args.title : title);
// inverted toggles the layout for reversed stacktraces from icicle to flamegraph
// and for default stacktraces from flamegraphs to icicle.
tail = printTill(out, tail, "/*inverted:*/false");
out.print(args.reverse ^ args.inverted);
tail = printTill(out, tail, "/*maxdiff:*/-1");
out.print(maxdiff);
tail = printTill(out, tail, "/*depth:*/0");
out.print(depth);
@@ -227,6 +266,15 @@ public class FlameGraph implements Comparator<Frame> {
}
private void printFrame(PrintStream out, Frame frame, int level, long x) {
StringBuilder sb = outbuf;
if (frame.diff != lastDiff) {
if (frame.diff == NEW_FRAME_DIFF) {
sb.append("d=U\n");
} else {
sb.append("d=").append(frame.diff).append('\n');
}
}
int nameAndType = order[frame.getTitleIndex()] << 3 | frame.getType();
boolean hasExtraTypes = (frame.inlined | frame.c1 | frame.interpreted) != 0 &&
frame.inlined < frame.total && frame.interpreted < frame.total;
@@ -238,7 +286,7 @@ public class FlameGraph implements Comparator<Frame> {
func = 'n';
}
StringBuilder sb = outbuf.append(func).append('(').append(nameAndType);
sb.append(func).append('(').append(nameAndType);
if (func == 'f') {
sb.append(',').append(level).append(',').append(x - lastX);
}
@@ -258,6 +306,7 @@ public class FlameGraph implements Comparator<Frame> {
lastLevel = level;
lastX = x;
lastTotal = frame.total;
lastDiff = frame.diff;
Frame[] children = frame.values().toArray(EMPTY_FRAME_ARRAY);
Arrays.sort(children, this);
@@ -279,6 +328,9 @@ public class FlameGraph implements Comparator<Frame> {
sb.append(strings[frame.getTitleIndex()]).append(FRAME_SUFFIX[frame.getType()]);
if (frame.self > 0) {
int tmpLength = sb.length();
if (maxdiff >= 0) {
sb.append(' ').append(frame.diff == NEW_FRAME_DIFF ? 0 : frame.self - frame.diff);
}
out.print(sb.append(' ').append(frame.self).append('\n'));
sb.setLength(tmpLength);
}
@@ -316,6 +368,21 @@ public class FlameGraph implements Comparator<Frame> {
return include != null;
}
private static void fillFrameCounters(Frame frame, byte type, long ticks) {
frame.self = frame.total = ticks;
switch (type) {
case TYPE_INTERPRETED:
frame.interpreted = ticks;
break;
case TYPE_INLINED:
frame.inlined = ticks;
break;
case TYPE_C1_COMPILED:
frame.c1 = ticks;
break;
}
}
private Frame addChild(Frame frame, String title, byte type, long ticks) {
frame.total += ticks;
@@ -390,7 +457,7 @@ public class FlameGraph implements Comparator<Frame> {
return order[f1.getTitleIndex()] - order[f2.getTitleIndex()];
}
public static void convert(String input, String output, Arguments args) throws IOException {
public static FlameGraph parse(String input, Arguments args) throws IOException {
FlameGraph fg = new FlameGraph(args);
try (InputStreamReader in = new InputStreamReader(new FileInputStream(input), StandardCharsets.UTF_8)) {
if (input.endsWith(".html")) {
@@ -399,6 +466,11 @@ public class FlameGraph implements Comparator<Frame> {
fg.parseCollapsed(in);
}
}
return fg;
}
public static void convert(String input, String output, Arguments args) throws IOException {
FlameGraph fg = parse(input, args);
try (PrintStream out = new PrintStream(output, "UTF-8")) {
fg.dump(out);
}

View File

@@ -16,11 +16,13 @@ public class Frame extends HashMap<Integer, Frame> {
public static final byte TYPE_KERNEL = 5;
public static final byte TYPE_C1_COMPILED = 6;
private static final int TYPE_SHIFT = 28;
static final int TYPE_SHIFT = 28;
static final int TITLE_MASK = (1 << TYPE_SHIFT) - 1;
final int key;
long total;
long self;
long diff;
long inlined, c1, interpreted;
private Frame(int key) {
@@ -36,7 +38,7 @@ public class Frame extends HashMap<Integer, Frame> {
}
int getTitleIndex() {
return key & ((1 << TYPE_SHIFT) - 1);
return key & TITLE_MASK;
}
byte getType() {

View File

@@ -8,6 +8,16 @@ package one.convert;
import java.lang.reflect.Array;
import java.util.HashMap;
/**
* Container which records the index of appearance of the value it holds.
* <p>
* Allows retrieving the index of a given object in constant time, as well as
* an ordered list of all values seen.
* <p>
* The object at index 0 is always the empty object.
*
* @param <T> type of the objects held in the container.
*/
public class Index<T> extends HashMap<T, Integer> {
private final Class<T> cls;
@@ -35,13 +45,9 @@ public class Index<T> extends HashMap<T, Integer> {
@SuppressWarnings("unchecked")
public T[] keys() {
T[] result = (T[]) Array.newInstance(cls, size());
keys(result);
return result;
}
public void keys(T[] result) {
for (Entry<T, Integer> entry : entrySet()) {
result[entry.getValue()] = entry.getKey();
}
return result;
}
}

View File

@@ -5,16 +5,14 @@
package one.convert;
import one.jfr.ClassRef;
import one.jfr.Dictionary;
import one.jfr.JfrReader;
import one.jfr.MethodRef;
import one.jfr.*;
import one.jfr.event.*;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import java.util.Map;
import java.util.regex.Pattern;
import static one.convert.Frame.*;
@@ -29,18 +27,19 @@ public abstract class JfrConverter extends Classifier {
this.args = args;
EventCollector collector = createCollector(args);
this.collector = args.nativemem && args.leak ? new MallocLeakAggregator(collector) : collector;
this.collector = args.nativemem && args.leak ? new MallocLeakAggregator(collector, args.tail) : collector;
}
public void convert() throws IOException {
jfr.stopAtNewChunk = true;
TimeIntervals timeIntervals = readLatencyTimeIntervals();
jfr.stopAtNewChunk = true;
while (jfr.hasMoreChunks()) {
// Reset method dictionary, since new chunk may have different IDs
methodNames = new Dictionary<>();
collector.beforeChunk();
collectEvents();
collectEvents(timeIntervals);
collector.afterChunk();
convertChunk();
@@ -51,15 +50,43 @@ public abstract class JfrConverter extends Classifier {
}
}
protected final TimeIntervals readLatencyTimeIntervals() throws IOException {
if (args.latency < 0) return null;
TimeIntervals.Builder intervalsBuilder = new TimeIntervals.Builder();
boolean foundMethodTrace = false; // We'll throw an exception if none is found
jfr.stopAtNewChunk = true;
while (jfr.hasMoreChunks()) {
long minLatencyTicks = args.latency * jfr.ticksPerSec / 1000;
MethodTrace event;
while ((event = jfr.readEvent(MethodTrace.class)) != null) {
foundMethodTrace = true;
if (event.duration >= minLatencyTicks) {
intervalsBuilder.add(jfr.eventTimeToNanos(event.time), jfr.eventTimeToNanos(event.time + event.duration));
}
}
}
jfr.rewind();
if (!foundMethodTrace) {
throw new RuntimeException("No jdk.MethodTrace events found");
}
return intervalsBuilder.build();
}
protected EventCollector createCollector(Arguments args) {
return new EventAggregator(args.threads, args.grain);
}
protected void collectEvents() throws IOException {
protected void collectEvents(TimeIntervals timeIntervals) throws IOException {
// args.nativemem ? MallocEvent.class should always be first for the leak detection feature
Class<? extends Event> eventClass = args.nativemem ? MallocEvent.class
: args.nativelock ? NativeLockEvent.class
: args.live ? LiveObject.class
: args.alloc ? AllocationSample.class
: args.lock ? ContendedLock.class
: args.trace ? MethodTrace.class
: ExecutionSample.class;
BitSet threadStates = null;
@@ -72,6 +99,9 @@ public abstract class JfrConverter extends Classifier {
threadStates = getThreadStates(true);
} else if (args.wall) {
threadStates = getThreadStates(false);
} else if (args.cpuTime) {
threadStates = new BitSet();
threadStates.set(ExecutionSample.CPU_TIME_SAMPLE);
}
long startTicks = args.from != 0 ? toTicks(args.from) : Long.MIN_VALUE;
@@ -80,7 +110,9 @@ public abstract class JfrConverter extends Classifier {
for (Event event; (event = jfr.readEvent(eventClass)) != null; ) {
if (event.time >= startTicks && event.time <= endTicks) {
if (threadStates == null || threadStates.get(((ExecutionSample) event).threadState)) {
collector.collect(event);
if (timeIntervals == null || timeIntervals.contains(jfr.eventTimeToNanos(event.time))) {
collector.collect(event);
}
}
}
}
@@ -90,6 +122,49 @@ public abstract class JfrConverter extends Classifier {
// To be overridden in subclasses
}
protected boolean excludeStack(int stackId, int threadId, long classId) {
Pattern include = args.include;
Pattern exclude = args.exclude;
if (include == null && exclude == null) {
return false;
}
if (args.threads) {
String threadName = getThreadName(threadId);
if (exclude != null && exclude.matcher(threadName).matches()) {
return true;
}
if (include != null && include.matcher(threadName).matches()) {
if (exclude == null) return false;
include = null;
}
}
if (classId != 0) {
String className = getClassName(classId);
if (exclude != null && exclude.matcher(className).matches()) {
return true;
}
if (include != null && include.matcher(className).matches()) {
if (exclude == null) return false;
include = null;
}
}
StackTrace stackTrace = jfr.stackTraces.get(stackId);
for (int i = 0; i < stackTrace.methods.length; i++) {
String name = getMethodName(stackTrace.methods[i], stackTrace.types[i]);
if (exclude != null && exclude.matcher(name).matches()) {
return true;
}
if (include != null && include.matcher(name).matches()) {
if (exclude == null) return false;
include = null;
}
}
return include != null;
}
protected int toThreadState(String name) {
Map<Integer, String> threadStates = jfr.enums.get("jdk.types.ThreadState");
if (threadStates != null) {
@@ -260,10 +335,30 @@ public abstract class JfrConverter extends Classifier {
methodType == TYPE_KERNEL;
}
public String getValueType() {
if (args.nativemem) return "malloc";
if (args.alloc || args.live) return "allocations";
if (args.lock) return "locks";
return "cpu";
}
public String getSampleUnits() {
return "count";
}
public String getTotalUnits() {
if (args.nativemem || args.alloc || args.live) return "bytes";
return "nanoseconds";
}
public double counterFactor() {
return (args.lock || args.nativelock) ? jfr.nanosPerTick : 1.0;
}
// Select sum(samples) or sum(value) depending on the --total option.
// For lock events, convert lock duration from ticks to nanoseconds.
// For lock and nativelock events, convert lock duration from ticks to nanoseconds.
protected abstract class AggregatedEventVisitor implements EventCollector.Visitor {
final double factor = !args.total ? 0.0 : args.lock ? 1e9 / jfr.ticksPerSec : 1.0;
private final double factor = !args.total ? 0.0 : counterFactor();
@Override
public final void visit(Event event, long samples, long value) {

View File

@@ -10,7 +10,6 @@ import one.jfr.StackTrace;
import one.jfr.event.AllocationSample;
import one.jfr.event.Event;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
@@ -73,19 +72,21 @@ public class JfrToFlame extends JfrConverter {
}
public void dump(OutputStream out) throws IOException {
try (PrintStream ps = new PrintStream(out, false, "UTF-8")) {
fg.dump(ps);
fg.dump(out);
}
public static FlameGraph parse(String input, Arguments args) throws IOException {
try (JfrReader jfr = new JfrReader(input)) {
JfrToFlame converter = new JfrToFlame(jfr, args);
converter.convert();
return converter.fg;
}
}
public static void convert(String input, String output, Arguments args) throws IOException {
JfrToFlame converter;
try (JfrReader jfr = new JfrReader(input)) {
converter = new JfrToFlame(jfr, args);
converter.convert();
}
try (FileOutputStream out = new FileOutputStream(output)) {
converter.dump(out);
FlameGraph fg = parse(input, args);
try (PrintStream out = new PrintStream(output, "UTF-8")) {
fg.dump(out);
}
}
}

View File

@@ -30,22 +30,37 @@ public class JfrToHeatmap extends JfrConverter {
@Override
protected EventCollector createCollector(Arguments args) {
return new EventCollector() {
long wallInterval;
private long getWallInterval() {
if (wallInterval == 0) {
String wall = jfr.settings.get("wall");
long interval = Long.parseLong(wall != null ? wall : jfr.settings.get("interval"));
wallInterval = interval != 0 ? interval : 50_000_000;
}
return wallInterval;
}
@Override
public void collect(Event event) {
int extra = 0;
int classId = 0;
byte type = 0;
if (event instanceof AllocationSample) {
extra = ((AllocationSample) event).classId;
classId = ((AllocationSample) event).classId;
type = ((AllocationSample) event).tlabSize == 0 ? TYPE_KERNEL : TYPE_INLINED;
} else if (event instanceof ContendedLock) {
extra = ((ContendedLock) event).classId;
classId = ((ContendedLock) event).classId;
type = TYPE_KERNEL;
}
long msFromStart = (event.time - jfr.chunkStartTicks) * 1_000 / jfr.ticksPerSec;
long timeMs = jfr.chunkStartNanos / 1_000_000 + msFromStart;
heatmap.addEvent(event.stackTraceId, extra, type, timeMs);
long timeNs = jfr.eventTimeToNanos(event.time);
long samples = event.samples();
while (true) {
heatmap.addEvent(event.stackTraceId, event.tid, classId, type, timeNs / 1_000_000);
if (--samples <= 0) break;
// Only wall clock events can have samples > 1
timeNs += getWallInterval();
}
}
@Override
@@ -62,6 +77,7 @@ public class JfrToHeatmap extends JfrConverter {
@Override
public void afterChunk() {
jfr.stackTraces.clear();
wallInterval = 0;
}
@Override

View File

@@ -0,0 +1,334 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import static one.convert.OtlpConstants.*;
import one.jfr.Dictionary;
import one.jfr.JfrReader;
import one.jfr.StackTrace;
import one.jfr.event.*;
import one.proto.Proto;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.*;
/**
* Converts .jfr output to OpenTelemetry protocol.
*/
public class JfrToOtlp extends JfrConverter {
// Size in bytes to be allocated in the buffer to hold the varint containing the length of the message
private static final int MSG_LARGE = 5;
private static final int MSG_SMALL = 1;
private final Index<String> stringPool = new Index<>(String.class, "");
private final Index<String> functionPool = new Index<>(String.class, "");
private final Index<Line> linePool = new Index<>(Line.class, Line.EMPTY);
private final Index<KeyValue> attributesPool = new Index<>(KeyValue.class, KeyValue.EMPTY);
private final Index<IntArray> stacksPool = new Index<>(IntArray.class, IntArray.EMPTY);
private final int threadNameIndex = stringPool.index(OTLP_THREAD_NAME);
private final Dictionary<AggregatedEvent> aggregatedEvents = new Dictionary<>();
// Chunk-private cache to remember mappings from stacktrace ID to OTLP stack index
private final Map<Integer, Integer> stacksIndexCache = new HashMap<>();
private double chunkCounterFactor;
private final Proto proto = new Proto(1024);
public JfrToOtlp(JfrReader jfr, Arguments args) {
super(jfr, args);
}
public void dump(OutputStream out) throws IOException {
out.write(proto.buffer(), 0, proto.size());
}
@Override
protected EventCollector createCollector(Arguments args) {
return new EventCollector() {
public void beforeChunk() {
chunkCounterFactor = counterFactor();
aggregatedEvents.clear();
stacksIndexCache.clear();
}
public void collect(Event e) {
if (excludeStack(e.stackTraceId, e.tid, 0)) {
return;
}
long key = ((long) e.tid) << 32 | e.stackTraceId;
AggregatedEvent ec = aggregatedEvents.get(key);
if (ec == null) {
ec = new AggregatedEvent();
aggregatedEvents.put(key, ec);
}
long recordedValue = !args.total ? e.samples() : chunkCounterFactor == 1.0 ? e.value() : (long) (e.value() * chunkCounterFactor);
ec.recordEvent(getUnixTimestampNanos(e.time), recordedValue);
}
private long getUnixTimestampNanos(long jfrTimestamp) {
long nanosFromStart = (long) ((jfrTimestamp - jfr.chunkStartTicks) * jfr.nanosPerTick);
return jfr.chunkStartNanos + nanosFromStart;
}
public void afterChunk() {}
public boolean finish() {
aggregatedEvents.clear();
stacksIndexCache.clear();
return false;
}
public void forEach(Visitor visitor) {
throw new UnsupportedOperationException("Not supported");
}
};
}
@Override
public void convert() throws IOException {
long rpMark = proto.startField(PROFILES_DATA_resource_profiles, MSG_LARGE);
long spMark = proto.startField(RESOURCE_PROFILES_scope_profiles, MSG_LARGE);
super.convert();
proto.commitField(spMark);
proto.commitField(rpMark);
writeProfileDictionary();
}
@Override
protected void convertChunk() {
long pMark = proto.startField(SCOPE_PROFILES_profiles, MSG_LARGE);
long sttMark = proto.startField(PROFILE_sample_type, MSG_SMALL);
proto.field(VALUE_TYPE_type_strindex, stringPool.index(getValueType()));
proto.field(VALUE_TYPE_unit_strindex,
stringPool.index(args.total ? getTotalUnits() : getSampleUnits()));
proto.commitField(sttMark);
proto.fieldFixed64(PROFILE_time_unix_nano, jfr.chunkStartNanos);
proto.field(PROFILE_duration_nanos, jfr.chunkDurationNanos());
aggregatedEvents.forEach((key, value) -> {
int stackTraceId = (int) key;
int tid = (int) (key >> 32);
writeSample(stackTraceId, tid, value);
});
proto.commitField(pMark);
}
private IntArray makeStack(int stackTraceId) {
StackTrace st = jfr.stackTraces.get(stackTraceId);
int[] stack = new int[st.methods.length];
for (int i = 0; i < st.methods.length; ++i) {
stack[i] = linePool.index(makeLine(st, i));
}
return new IntArray(stack);
}
private Line makeLine(StackTrace stackTrace, int i) {
String methodName = getMethodName(stackTrace.methods[i], stackTrace.types[i]);
int lineNumber = stackTrace.locations[i] >>> 16;
int functionIdx = functionPool.index(methodName);
return new Line(functionIdx, lineNumber);
}
private void writeSample(int stackTraceId, int tid, AggregatedEvent ae) {
// 24 is the sum of:
// 4 tags: 1 byte
// 5 * 2: max size of thread name and stack idx
// 5 * 2: max size of timestamps/values arrays
int maxLengthBytes = varintSize(24 + ae.eventsCount * (8 /* fixed64 */ + 10 /* max varint */));
long sMark = proto.startField(PROFILE_samples, maxLengthBytes);
proto.field(SAMPLE_stack_index, stacksIndexCache.computeIfAbsent(stackTraceId, key -> stacksPool.index(makeStack(key))));
String threadName = getThreadName(tid);
KeyValue threadNameKv = new KeyValue(threadNameIndex, threadName);
proto.field(SAMPLE_attribute_indices, attributesPool.index(threadNameKv));
long tMark = proto.startField(SAMPLE_timestamps_unix_nano, varintSize(8 * ae.eventsCount));
for (int i = 0; i < ae.eventsCount; ++i) {
proto.writeFixed64(ae.timestamps[i]);
}
proto.commitField(tMark);
long vMark = proto.startField(SAMPLE_values, varintSize(10 * ae.eventsCount));
for (int i = 0; i < ae.eventsCount; ++i) {
proto.writeLong(ae.values[i]);
}
proto.commitField(vMark);
proto.commitField(sMark);
}
private static int varintSize(long value) {
return (640 - Long.numberOfLeadingZeros(value | 1) * 9) / 64;
}
private void writeProfileDictionary() {
long profilesDictionaryMark = proto.startField(PROFILES_DATA_dictionary, MSG_LARGE);
// Mapping[0] must be a default mapping according to the spec
long mMark = proto.startField(PROFILES_DICTIONARY_mapping_table, MSG_SMALL);
proto.commitField(mMark);
for (String name : functionPool.keys()) {
long fMark = proto.startField(PROFILES_DICTIONARY_function_table, MSG_SMALL);
proto.field(FUNCTION_name_strindex, stringPool.index(name));
proto.commitField(fMark);
}
for (Line line : linePool.keys()) {
long locMark = proto.startField(PROFILES_DICTIONARY_location_table, MSG_SMALL);
proto.field(LOCATION_mapping_index, 0);
long lineMark = proto.startField(LOCATION_line, MSG_SMALL);
proto.field(LINE_function_index, line.functionIdx);
proto.field(LINE_lines, line.lineNumber);
proto.commitField(lineMark);
proto.commitField(locMark);
}
for (IntArray stack : stacksPool.keys()) {
long stackMark = proto.startField(PROFILES_DICTIONARY_stack_table, MSG_LARGE);
long locationIndicesMark = proto.startField(STACK_location_indices, MSG_LARGE);
for (int locationIdx : stack.array) {
proto.writeInt(locationIdx);
}
proto.commitField(locationIndicesMark);
proto.commitField(stackMark);
}
for (String s : stringPool.keys()) {
proto.field(PROFILES_DICTIONARY_string_table, s);
}
for (KeyValue kv : attributesPool.keys()) {
long aMark = proto.startField(PROFILES_DICTIONARY_attribute_table, MSG_LARGE);
proto.field(KEY_VALUE_AND_UNIT_key_strindex, kv.keyStrindex);
long vMark = proto.startField(KEY_VALUE_AND_UNIT_value, MSG_LARGE);
proto.field(ANY_VALUE_string_value, kv.value);
proto.commitField(vMark);
proto.commitField(aMark);
}
proto.commitField(profilesDictionaryMark);
}
public static void convert(String input, String output, Arguments args) throws IOException {
JfrToOtlp converter;
try (JfrReader jfr = new JfrReader(input)) {
converter = new JfrToOtlp(jfr, args);
converter.convert();
}
try (FileOutputStream out = new FileOutputStream(output)) {
converter.dump(out);
}
}
private static final class Line {
static final Line EMPTY = new Line(0, 0);
final int functionIdx;
final int lineNumber;
Line(int functionIdx, int lineNumber) {
this.functionIdx = functionIdx;
this.lineNumber = lineNumber;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Line)) return false;
Line other = (Line) o;
return functionIdx == other.functionIdx && lineNumber == other.lineNumber;
}
@Override
public int hashCode() {
int result = 17;
result = 31 * result + functionIdx;
return 31 * result + lineNumber;
}
}
private static final class KeyValue {
static final KeyValue EMPTY = new KeyValue(0, "");
final int keyStrindex;
// Only string value is fine for now
final String value;
KeyValue(int keyStrindex, String value) {
this.keyStrindex = keyStrindex;
this.value = value;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof KeyValue)) return false;
KeyValue other = (KeyValue) o;
return keyStrindex == other.keyStrindex && value.equals(other.value);
}
@Override
public int hashCode() {
int result = 17;
result = 31 * result + keyStrindex;
return 31 * result + value.hashCode();
}
}
private static final class IntArray {
static final IntArray EMPTY = new IntArray(new int[0]);
final int[] array;
final int hash;
IntArray(int[] array) {
this.array = array;
this.hash = Arrays.hashCode(array);
}
@Override
public boolean equals(Object o) {
return o instanceof IntArray && Arrays.equals(array, ((IntArray) o).array);
}
@Override
public int hashCode() {
return hash;
}
}
private static final class AggregatedEvent {
long[] timestamps = new long[1];
long[] values = new long[1];
int eventsCount = 0;
public void recordEvent(long timestamp, long value) {
if (eventsCount == timestamps.length) {
int newSize = timestamps.length * 2;
timestamps = Arrays.copyOf(timestamps, newSize);
values = Arrays.copyOf(values, newSize);
}
timestamps[eventsCount] = timestamp;
values[eventsCount] = value;
++eventsCount;
}
}
}

View File

@@ -27,18 +27,7 @@ public class JfrToPprof extends JfrConverter {
public JfrToPprof(JfrReader jfr, Arguments args) {
super(jfr, args);
Proto sampleType;
if (args.nativemem) {
sampleType = valueType("malloc", args.total ? "bytes" : "count");
} else if (args.alloc || args.live) {
sampleType = valueType("allocations", args.total ? "bytes" : "count");
} else if (args.lock) {
sampleType = valueType("locks", args.total ? "nanoseconds" : "count");
} else {
sampleType = valueType("cpu", args.total ? "nanoseconds" : "count");
}
profile.field(1, sampleType)
profile.field(1, valueType(getValueType(), args.total ? getTotalUnits() : getSampleUnits()))
.field(13, strings.index("Produced by async-profiler"));
}
@@ -49,6 +38,9 @@ public class JfrToPprof extends JfrConverter {
@Override
public void visit(Event event, long value) {
if (excludeStack(event.stackTraceId, event.tid, event.classId())) {
return;
}
profile.field(2, sample(s, event, value));
s.reset();
}
@@ -80,7 +72,7 @@ public class JfrToPprof extends JfrConverter {
}
private Proto sample(Proto s, Event event, long value) {
int packedLocations = s.startField(1);
long packedLocations = s.startField(1, 3);
long classId = event.classId();
if (classId != 0) {

View File

@@ -3,10 +3,11 @@
* SPDX-License-Identifier: Apache-2.0
*/
import one.convert.*;
package one.convert;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
public class Main {
@@ -18,7 +19,7 @@ public class Main {
return;
}
if (args.files.size() == 1) {
if (args.files.size() == (args.diff ? 2 : 1)) {
args.files.add(".");
}
@@ -35,6 +36,34 @@ public class Main {
}
}
if (args.diff) {
if (fileCount != 2) {
throw new IllegalArgumentException("--diff option requires two input files");
}
if (!"html".equals(args.output) && !"collapsed".equals(args.output)) {
throw new IllegalArgumentException("--diff option requires html or collapsed output format");
}
args.norm = true; // don't let random IDs in class names spoil comparison
String input1 = args.files.get(0);
String input2 = args.files.get(1);
String output = isDirectory ? new File(lastFile, replaceExt(input2, "diff." + args.output)).getPath() : lastFile;
System.out.print("Converting " + getFileName(input2) + " vs " + getFileName(input1) + " -> " + getFileName(output) + " ");
System.out.flush();
long startTime = System.nanoTime();
FlameGraph base = parseFlameGraph(input1, args);
FlameGraph current = parseFlameGraph(input2, args);
current.diff(base);
current.dump(new FileOutputStream(output));
long endTime = System.nanoTime();
System.out.print("# " + (endTime - startTime) / 1000000 / 1000.0 + " s\n");
return;
}
for (int i = 0; i < fileCount; i++) {
String input = args.files.get(i);
String output = isDirectory ? new File(lastFile, replaceExt(input, args.output)).getPath() : lastFile;
@@ -58,6 +87,8 @@ public class Main {
JfrToPprof.convert(input, output, args);
} else if ("heatmap".equals(args.output)) {
JfrToHeatmap.convert(input, output, args);
} else if ("otlp".equals(args.output)) {
JfrToOtlp.convert(input, output, args);
} else {
throw new IllegalArgumentException("Unrecognized output format: " + args.output);
}
@@ -66,6 +97,14 @@ public class Main {
}
}
public static FlameGraph parseFlameGraph(String input, Arguments args) throws IOException {
if (isJfr(input)) {
return JfrToFlame.parse(input, args);
} else {
return FlameGraph.parse(input, args);
}
}
private static String getFileName(String fileName) {
return fileName.substring(fileName.lastIndexOf(File.separatorChar) + 1);
}
@@ -93,16 +132,23 @@ public class Main {
System.out.print("Usage: jfrconv [options] <input> [<input>...] <output>\n" +
"\n" +
"Conversion options:\n" +
" -o --output FORMAT Output format: html, collapsed, pprof, pb.gz, heatmap\n" +
" -o --output FORMAT Output format: html, collapsed, pprof, pb.gz, heatmap, otlp\n" +
" -I --include REGEX Include only stacks with the specified frames\n" +
" -X --exclude REGEX Exclude stacks with the specified frames\n" +
" --diff Create differential Flame Graph from two input files\n" +
"\n" +
"JFR options:\n" +
" --cpu CPU profile\n" +
" --cpu CPU profile (ExecutionSample)\n" +
" --cpu-time CPU profile (CPUTimeSample)\n" +
" --wall Wall clock profile\n" +
" --alloc Allocation profile\n" +
" --live Live object profile\n" +
" --nativemem malloc profile\n" +
" --leak Only include memory leaks in nativemem\n" +
" --tail RATIO Ignore tail allocations for leak profiling (10% by default)\n" +
" --lock Lock contention profile\n" +
" --nativelock Native (pthread) lock contention profile\n" +
" --trace Method traces / latency profile\n" +
" -t --threads Split stack traces by threads\n" +
" -s --state LIST Filter thread states: runnable, sleeping\n" +
" --classify Classify samples into predefined categories\n" +
@@ -114,6 +160,7 @@ public class Main {
" --dot Dotted class names\n" +
" --from TIME Start time in ms (absolute or relative)\n" +
" --to TIME End time in ms (absolute or relative)\n" +
" --latency MS Retain only samples within MethodTraces of at least MS milliseconds\n" +
"\n" +
"Flame Graph options:\n" +
" --title STRING Flame Graph title\n" +
@@ -123,8 +170,6 @@ public class Main {
" -r --reverse Reverse stack traces (defaults to icicle graph)\n" +
" -i --inverted Toggles the layout for reversed stacktraces from icicle to flamegraph\n" +
" and for default stacktraces from flamegraph to icicle\n" +
" -I --include REGEX Include only stacks with the specified frames\n" +
" -X --exclude REGEX Exclude stacks with the specified frames\n" +
" --highlight REGEX Highlight frames matching the given pattern\n");
}
}

View File

@@ -0,0 +1,63 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
final class OtlpConstants {
static final String OTLP_THREAD_NAME = "thread.name";
static final int
PROFILES_DICTIONARY_mapping_table = 1,
PROFILES_DICTIONARY_location_table = 2,
PROFILES_DICTIONARY_function_table = 3,
PROFILES_DICTIONARY_string_table = 5,
PROFILES_DICTIONARY_attribute_table = 6,
PROFILES_DICTIONARY_stack_table = 7;
static final int
PROFILES_DATA_resource_profiles = 1,
PROFILES_DATA_dictionary = 2;
static final int RESOURCE_PROFILES_scope_profiles = 2;
static final int SCOPE_PROFILES_profiles = 2;
static final int
PROFILE_sample_type = 1,
PROFILE_samples = 2,
PROFILE_time_unix_nano = 3,
PROFILE_duration_nanos = 4;
static final int
VALUE_TYPE_type_strindex = 1,
VALUE_TYPE_unit_strindex = 2,
VALUE_TYPE_aggregation_temporality = 3;
static final int
SAMPLE_stack_index = 1,
SAMPLE_values = 2,
SAMPLE_attribute_indices = 3,
SAMPLE_timestamps_unix_nano = 5;
static final int
STACK_location_indices = 1;
static final int
LOCATION_mapping_index = 1,
LOCATION_line = 3;
static final int
LINE_function_index = 1,
LINE_lines = 2;
static final int FUNCTION_name_strindex = 1;
static final int
KEY_VALUE_AND_UNIT_key_strindex = 1,
KEY_VALUE_AND_UNIT_value = 2;
static final int ANY_VALUE_string_value = 1;
}

View File

@@ -0,0 +1,79 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.convert;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.Arrays;
public final class TimeIntervals {
private final long[] startIntervals;
private final long[] endIntervals;
private TimeIntervals(long[] startIntervals, long[] endIntervals) {
this.startIntervals = startIntervals;
this.endIntervals = endIntervals;
}
public boolean contains(long instant) {
int searchOut = Arrays.binarySearch(startIntervals, instant);
if (searchOut >= 0) {
return true;
}
int insertionPoint = -(searchOut + 1); // First element greater than instant
if (insertionPoint == 0) {
return false; // First interval start is greater than instant
}
int startIndex = insertionPoint - 1;
return instant <= endIntervals[startIndex];
}
public static final class Builder {
// No overlapping intervals
private final TreeMap<Long, Long> timeIntervals = new TreeMap<>();
public void add(long startInstant, long endInstant) {
if (startInstant > endInstant) {
throw new IllegalArgumentException("'startInstant' should not be after 'endInstant'");
}
// Are there shorter intervals which overlap with the new interval?
NavigableMap<Long, Long> view = timeIntervals.subMap(startInstant, true /* inclusive */, endInstant, true /* inclusive */);
Map.Entry<Long, Long> last = view.pollLastEntry();
if (last != null) {
endInstant = Long.max(last.getValue(), endInstant);
}
view.clear();
// Perhaps the end of the interval before 'view' ends after startInstant?
Map.Entry<Long, Long> floor = timeIntervals.floorEntry(startInstant);
if (floor != null) {
long floorEnd = floor.getValue();
if (floorEnd >= startInstant) {
timeIntervals.remove(floor.getKey());
startInstant = floor.getKey();
endInstant = Long.max(endInstant, floorEnd);
}
}
timeIntervals.put(startInstant, endInstant);
}
public TimeIntervals build() {
long[] startIntervals = new long[timeIntervals.size()];
long[] endIntervals = new long[timeIntervals.size()];
int index = 0;
for (Map.Entry<Long, Long> entry : timeIntervals.entrySet()) {
startIntervals[index] = entry.getKey();
endIntervals[index] = entry.getValue();
++index;
}
return new TimeIntervals(startIntervals, endIntervals);
}
}
}

View File

@@ -7,13 +7,10 @@ package one.heatmap;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.Comparator;
import java.util.*;
import java.util.regex.Pattern;
import one.convert.Arguments;
import one.convert.Index;
import one.convert.JfrConverter;
import one.convert.ResourceProcessor;
import one.convert.*;
import one.jfr.DictionaryInt;
public class Heatmap {
@@ -28,11 +25,11 @@ public class Heatmap {
public Heatmap(Arguments args, JfrConverter converter) {
this.args = args;
this.state = new State(converter, BLOCK_DURATION_MS);
this.state = new State(converter, args, BLOCK_DURATION_MS);
}
public void addEvent(int stackTraceId, int extra, byte type, long timeMs) {
state.addEvent(stackTraceId, extra, type, timeMs);
public void addEvent(int stackTraceId, int threadId, int classId, byte type, long timeMs) {
state.addEvent(stackTraceId, threadId, classId, type, timeMs);
}
public void addStack(long id, long[] methods, int[] locations, byte[] types, int size) {
@@ -40,13 +37,15 @@ public class Heatmap {
}
public void beforeChunk() {
state.methodsCache.clear();
state.methodCache.clear();
state.includeCache.clear();
}
public void finish(long startMs) {
this.startMs = startMs;
state.methodsCache.clear();
state.methodCache.clear();
state.stackTracesCache.clear();
state.includeCache.clear();
}
private EvaluationContext evaluate() {
@@ -54,9 +53,9 @@ public class Heatmap {
this.state = null;
return new EvaluationContext(
state.sampleList.samples(),
state.methodsCache.methodsIndex(),
state.methods,
state.stackTracesRemap.orderedTraces(),
state.methodsCache.orderedSymbolTable()
state.symbolTable.keys()
);
}
@@ -95,7 +94,7 @@ public class Heatmap {
stream.print('E');
tail = ResourceProcessor.printTill(stream, tail, "/*title:*/");
stream.print(args.title == null ? "Heatmap" : args.title);
stream.print(args.title != null ? args.title : "Heatmap");
tail = ResourceProcessor.printTill(stream, tail, "/*startMs:*/0");
stream.print(startMs);
@@ -194,7 +193,7 @@ public class Heatmap {
int methodId = context.nodeTree.extractMethodId(d);
out.writeVar(synonymTable.nodeIdOrSynonym(parentId));
out.writeVar(context.orderedMethods[methodId].frequencyOrNewMethodId);
out.writeVar(context.orderedMethods[methodId].frequencyBasedId);
}
}
@@ -215,26 +214,24 @@ public class Heatmap {
out.writeVar(startsCount);
for (Method method : context.orderedMethods) {
if (method.start) {
out.writeVar(method.frequencyOrNewMethodId);
out.writeVar(method.frequencyBasedId);
}
}
}
private void renameMethodsByFrequency(EvaluationContext context) {
Arrays.sort(context.orderedMethods, new Comparator<Method>() {
Method[] methodsByFrequency = context.orderedMethods.clone();
Arrays.sort(methodsByFrequency, new Comparator<Method>() {
@Override
public int compare(Method o1, Method o2) {
return Integer.compare(o2.frequencyOrNewMethodId, o1.frequencyOrNewMethodId);
return Integer.compare(o2.frequency, o1.frequency);
}
});
for (int i = 0; i < context.orderedMethods.length; i++) {
Method method = context.orderedMethods[i];
method.frequencyOrNewMethodId = i + 1; // zero is reserved for no method
for (int i = 0; i < methodsByFrequency.length; i++) {
Method method = methodsByFrequency[i];
method.frequencyBasedId = i + 1; // zero is reserved for no method
}
// restores order
context.methods.keys(context.orderedMethods);
}
private int[] buildLz78TreeAndPrepareData(EvaluationContext context) {
@@ -263,7 +260,7 @@ public class Heatmap {
for (int methodId : stack) {
current = context.nodeTree.appendChild(current, methodId);
if (current == 0) { // so we are starting from root again, it will be written to output as Lz78 element - [parent node id; method id]
context.orderedMethods[methodId].frequencyOrNewMethodId++;
context.orderedMethods[methodId].frequency++;
if (stackBuffer.length == chunksIterator) {
stackBuffer = Arrays.copyOf(stackBuffer, chunksIterator + chunksIterator / 2);
}
@@ -288,7 +285,7 @@ public class Heatmap {
for (int methodId : stack) {
current = context.nodeTree.appendChild(current, methodId);
if (current == 0) { // so we are starting from root again, it will be written to output as Lz78 element - [parent node id; method id]
context.orderedMethods[methodId].frequencyOrNewMethodId++;
context.orderedMethods[methodId].frequency++;
}
}
}
@@ -351,7 +348,7 @@ public class Heatmap {
Arrays.sort(evaluationContext.orderedMethods, new Comparator<Method>() {
@Override
public int compare(Method o1, Method o2) {
return Integer.compare(o1.frequencyOrNewMethodId, o2.frequencyOrNewMethodId);
return Integer.compare(o1.frequencyBasedId, o2.frequencyBasedId);
}
});
out.nextByte('A');
@@ -372,7 +369,6 @@ public class Heatmap {
}
private static class EvaluationContext {
final Index<Method> methods;
final Method[] orderedMethods;
final int[][] stackTraces;
final String[] symbols;
@@ -383,10 +379,8 @@ public class Heatmap {
EvaluationContext(SampleList.Result sampleList, Index<Method> methods, int[][] stackTraces, String[] symbols) {
this.sampleList = sampleList;
this.methods = methods;
this.stackTraces = stackTraces;
this.symbols = symbols;
orderedMethods = methods.keys();
}
}
@@ -395,48 +389,105 @@ public class Heatmap {
private static final int LIMIT = Integer.MAX_VALUE;
final JfrConverter converter;
final Arguments args;
final SampleList sampleList;
final StackStorage stackTracesRemap = new StackStorage();
// Maps stack trace ID to prototype ID in stackTracesRemap
final DictionaryInt stackTracesCache = new DictionaryInt();
final MethodCache methodsCache;
final Map<MethodKey, Integer> methodCache = new HashMap<>();
final BidirectionalIndex<Method> methods = new BidirectionalIndex<>(Method.class, Method.EMPTY);
final BidirectionalIndex<String> symbolTable = new BidirectionalIndex<>(String.class, "");
// Cache for exclude/include filter results per prototype ID
final Map<Integer, Boolean> includeCache = new HashMap<>();
// reusable array to (temporary) store (potentially) new stack trace
int[] cachedStackTrace = new int[4096];
State(JfrConverter converter, long blockDurationMs) {
sampleList = new SampleList(blockDurationMs);
methodsCache = new MethodCache(converter);
State(JfrConverter converter, Arguments args, long blockDurationMs) {
this.converter = converter;
this.args = args;
this.sampleList = new SampleList(blockDurationMs);
}
public void addEvent(int stackTraceId, int extra, byte type, long timeMs) {
if (sampleList.getRecordsCount() >= LIMIT) {
return;
private String resolveFrameName(Method method) {
if (method.className == 0) {
return symbolTable.getKey(method.methodName);
}
if (extra == 0) {
sampleList.add(stackTracesCache.get(stackTraceId), timeMs);
return;
if (method.methodName == 0) {
return symbolTable.getKey(method.className);
}
return symbolTable.getKey(method.className) + '.' + symbolTable.getKey(method.methodName);
}
int id = stackTracesCache.get((long) extra << 32 | stackTraceId, -1);
if (id != -1) {
sampleList.add(id, timeMs);
private boolean includeStack(int prototypeId) {
if (args.include == null && args.exclude == null) {
return true;
}
return includeCache.computeIfAbsent(prototypeId, stackId -> applyIncludeExcludeFilter(stackId));
}
// Returns true if the stack should be included
private boolean applyIncludeExcludeFilter(int stackId) {
int[] stack = stackTracesRemap.get(stackId);
Pattern include = args.include;
Pattern exclude = args.exclude;
for (int i = 0; i < stack.length; i++) {
Method method = methods.getKey(stack[i]);
String name = resolveFrameName(method);
if (exclude != null && exclude.matcher(name).matches()) {
return false;
}
if (include != null && include.matcher(name).matches()) {
if (exclude == null) return true;
include = null;
}
}
return include == null;
}
public void addEvent(int stackTraceId, int threadId, int classId, byte type, long timeMs) {
if (sampleList.getRecordsCount() >= LIMIT || stackTraceId == 0) {
return;
}
int prototypeId = stackTracesCache.get(stackTraceId);
if (classId == 0 && !args.threads) {
if (includeStack(prototypeId)) {
sampleList.add(prototypeId, timeMs);
}
return;
}
int[] prototype = stackTracesRemap.get(prototypeId);
int stackSize = prototype.length + (args.threads ? 1 : 0) + (classId != 0 ? 1 : 0);
if (cachedStackTrace.length < stackSize) {
cachedStackTrace = new int[stackSize * 2];
}
id = stackTracesRemap.indexWithPrototype(prototype, methodsCache.indexForClass(extra, type));
stackTracesCache.put((long) extra << 32 | stackTraceId, id);
if (args.threads) {
MethodKey key = new MethodKey(MethodKeyType.THREAD, threadId, -1, Frame.TYPE_NATIVE, true);
cachedStackTrace[0] = getMethodIndex(key);
}
sampleList.add(id, timeMs);
System.arraycopy(prototype, 0, cachedStackTrace, args.threads ? 1 : 0, prototype.length);
if (classId != 0) {
MethodKey key = new MethodKey(MethodKeyType.CLASS, classId, -1, type, false);
cachedStackTrace[stackSize - 1] = getMethodIndex(key);
}
int newStackId = stackTracesRemap.index(cachedStackTrace, stackSize);
if (includeStack(newStackId)) {
sampleList.add(newStackId, timeMs);
}
}
public void addStack(long id, long[] methods, int[] locations, byte[] types, int size) {
int[] stackTrace = cachedStackTrace;
if (stackTrace.length < size) {
cachedStackTrace = stackTrace = new int[size * 2];
if (cachedStackTrace.length < size) {
cachedStackTrace = new int[size * 2];
}
for (int i = size - 1; i >= 0; i--) {
@@ -447,12 +498,89 @@ public class Heatmap {
int index = size - 1 - i;
boolean firstMethodInTrace = index == 0;
stackTrace[index] = methodsCache.index(methodId, location, type, firstMethodInTrace);
// When args.threads is true, the first frame is the artificial thread frame
boolean firstFrameInStack = firstMethodInTrace && !args.threads;
MethodKey key = new MethodKey(MethodKeyType.METHOD, methodId, location, type, firstFrameInStack);
cachedStackTrace[index] = getMethodIndex(key);
}
stackTracesCache.put(id, stackTracesRemap.index(stackTrace, size));
stackTracesCache.put(id, stackTracesRemap.index(cachedStackTrace, size));
}
private int getMethodIndex(MethodKey key) {
Integer oldIdx = methodCache.get(key);
if (oldIdx != null) return oldIdx;
int newIdx = methods.index(key.makeMethod(converter, symbolTable));
methodCache.put(key, newIdx);
return newIdx;
}
private static final class MethodKey {
private final long methodId;
// 32 bits: location
// 8 bits: type
// 1 bit: firstInStack
private final long metadata;
// Used to infer what type of method to create
private final MethodKeyType keyType;
public MethodKey(MethodKeyType keyType, long methodId, int location, byte type, boolean firstInStack) {
this.keyType = keyType;
this.methodId = methodId;
this.metadata = (long) (firstInStack ? 1 : 0) << 40 | (type & 0xffL) << 32 | (location & 0xFFFFFFFFL);
}
public int getLocation() {
return (int) metadata;
}
public byte getType() {
return (byte) (metadata >> 32);
}
public boolean getFirstInStack() {
return ((metadata >> 40) & 1L) != 0;
}
public Method makeMethod(JfrConverter converter, Index<String> symbolTable) {
switch (keyType) {
case METHOD:
StackTraceElement ste = converter.getStackTraceElement(methodId, getType(), getLocation());
int className = symbolTable.index(ste.getClassName());
int methodName = symbolTable.index(ste.getMethodName());
return new Method(className, methodName, getLocation(), getType(), getFirstInStack());
case THREAD:
String threadName = converter.getThreadName(Math.toIntExact(methodId));
return new Method(0, symbolTable.index(threadName), getLocation(), getType(), getFirstInStack());
case CLASS:
String javaClassName = converter.getClassName(methodId);
return new Method(symbolTable.index(javaClassName), 0, getLocation(), getType(), getFirstInStack());
default:
throw new IllegalArgumentException("Unexpected keyType: " + keyType);
}
}
@Override
public boolean equals(Object other) {
if (!(other instanceof MethodKey)) return false;
MethodKey methodKey = (MethodKey) other;
return methodId == methodKey.methodId && metadata == methodKey.metadata && keyType == methodKey.keyType;
}
@Override
public int hashCode() {
return 31 * (31 * Long.hashCode(methodId) + Long.hashCode(metadata)) + keyType.hashCode();
}
}
private enum MethodKeyType {
METHOD, THREAD, CLASS
}
}
}

View File

@@ -9,25 +9,20 @@ import one.convert.Frame;
public class Method {
public static final Method EMPTY = new Method(0, 0, -1, (byte) 0, false);
public final int className;
public final int methodName;
public final int location;
public final byte type;
public final boolean start;
final long originalMethodId;
Method next;
public int frequencyOrNewMethodId;
public int frequency;
// An identifier based on frequency ordering, more frequent methods will get a lower ID
public int frequencyBasedId;
public int index;
Method(int className, int methodName) {
this(0, className, methodName, 0, Frame.TYPE_NATIVE, true);
}
Method(long originalMethodId, int className, int methodName, int location, byte type, boolean start) {
this.originalMethodId = originalMethodId;
Method(int className, int methodName, int location, byte type, boolean start) {
this.className = className;
this.methodName = methodName;
this.location = location;

View File

@@ -1,114 +0,0 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.heatmap;
import java.util.Arrays;
import one.convert.Index;
import one.convert.JfrConverter;
import one.jfr.Dictionary;
public class MethodCache {
private final JfrConverter converter;
private final Index<String> symbolTable = new Index<>(String.class, "", 32768);
private final Index<Method> methodIndex = new Index<>(Method.class, new Method(symbolTable.index("all"), 0), 32768);
private final Method[] nearCache = new Method[256 * 256];
// It should be better to create dictionary with linked methods instead of open addressed hash table
// but in most cases all methods should fit nearCache, so less code is better
private final Dictionary<Method> farMethods = new Dictionary<>(1024);
public MethodCache(JfrConverter converter) {
this.converter = converter;
}
public void clear() {
Arrays.fill(nearCache, null);
farMethods.clear();
}
public int index(long methodId, int location, byte type, boolean firstInStack) {
Method method;
if (methodId < nearCache.length) {
int mid = (int) methodId;
method = nearCache[mid];
if (method == null) {
method = createMethod(methodId, location, type, firstInStack);
nearCache[mid] = method;
return method.index = methodIndex.index(method);
}
} else {
// this should be extremely rare case
method = farMethods.get(methodId);
if (method == null) {
method = createMethod(methodId, location, type, firstInStack);
farMethods.put(methodId, method);
return method.index = methodIndex.index(method);
}
}
Method last = null;
Method prototype = null;
while (method != null) {
if (method.originalMethodId == methodId) {
if (method.location == location && method.type == type && method.start == firstInStack) {
return method.index;
}
prototype = method;
}
last = method;
method = method.next;
}
if (prototype != null) {
last.next = method = new Method(methodId, prototype.className, prototype.methodName, location, type, firstInStack);
return method.index = methodIndex.index(method);
}
last.next = method = createMethod(methodId, location, type, firstInStack);
return method.index = methodIndex.index(method);
}
public int indexForClass(int extra, byte type) {
long methodId = (long) extra << 32 | 1L << 63;
Method method = farMethods.get(methodId);
Method last = null;
while (method != null) {
if (method.originalMethodId == methodId) {
if (method.location == -1 && method.type == type && !method.start) {
return method.index;
}
}
last = method;
method = method.next;
}
String javaClassName = converter.getClassName(extra);
method = new Method(methodId, symbolTable.index(javaClassName), 0, -1, type, false);
if (last == null) {
farMethods.put(methodId, method);
} else {
last.next = method;
}
return method.index = methodIndex.index(method);
}
private Method createMethod(long methodId, int location, byte type, boolean firstInStack) {
StackTraceElement ste = converter.getStackTraceElement(methodId, type, location);
int className = symbolTable.index(ste.getClassName());
int methodName = symbolTable.index(ste.getMethodName());
return new Method(methodId, className, methodName, location, type, firstInStack);
}
public String[] orderedSymbolTable() {
return symbolTable.keys();
}
public Index<Method> methodsIndex() {
return methodIndex;
}
}

View File

@@ -67,7 +67,7 @@ public class SampleList {
}
currentBlockSize++;
stackIds[stackIdsPos++] = (int) (currentData & 0xFFFFFFFFL) - 1;
stackIds[stackIdsPos++] = (int) currentData - 1;
}
if (currentBlockId <= lastBlockId) {

View File

@@ -7,9 +7,9 @@ package one.heatmap;
import java.util.Arrays;
public class StackStorage {
public final class StackStorage {
protected static final int INITIAL_CAPACITY = 16 * 1024;
private static final int INITIAL_CAPACITY = 16 * 1024;
private int size;
@@ -32,65 +32,22 @@ public class StackStorage {
return values[id - 1];
}
public int index(int[] stack, int stackSize) {
public int index(int[] input, int inputSize) {
int mask = meta.length - 1;
int hashCode = murmur(stack, stackSize);
int i = hashCode & mask;
while (true) {
long currentMeta = meta[i];
if (currentMeta == 0) {
break;
}
int hash = (int) currentMeta;
if (hash == hashCode) {
int targetHash = murmur(input, inputSize);
int i = targetHash & mask;
for (long currentMeta = meta[i]; currentMeta != 0; currentMeta = meta[i]) {
if ((int) currentMeta == targetHash) {
int index = (int) (currentMeta >>> 32);
int[] value = values[index];
if (equals(value, stack, stackSize)) {
if (equals(input, inputSize, values[index])) {
return index + 1;
}
}
i = (i + 1) & mask;
}
values[size] = Arrays.copyOf(stack, stackSize);
meta[i] = (long) size << 32 | (hashCode & 0xFFFFFFFFL);
size++;
if (size * 2 > values.length) {
resize(values.length * 2);
}
return size;
}
public int indexWithPrototype(int[] prototype, int append) {
int mask = meta.length - 1;
int hashCode = murmurWithExtra(prototype, append);
int i = hashCode & mask;
while (true) {
long currentMeta = meta[i];
if (currentMeta == 0) {
break;
}
int hash = (int) currentMeta;
if (hash == hashCode) {
int index = (int) (currentMeta >>> 32);
int[] value = values[index - 1];
if (equalsWithExtra(value, prototype, append)) {
return index;
}
}
i = (i + 1) & mask;
}
int[] stack = Arrays.copyOf(prototype, prototype.length + 1);
stack[prototype.length] = append;
values[size] = stack;
meta[i] = (long) size << 32 | (hashCode & 0xFFFFFFFFL);
values[size] = Arrays.copyOf(input, inputSize);
meta[i] = (long) size << 32 | (targetHash & 0xFFFFFFFFL);
size++;
if (size * 2 > values.length) {
@@ -104,7 +61,7 @@ public class StackStorage {
return Arrays.copyOf(values, size);
}
protected void resize(int newCapacity) {
private void resize(int newCapacity) {
long[] newMeta = new long[newCapacity * 2];
int mask = newMeta.length - 1;
@@ -124,33 +81,17 @@ public class StackStorage {
values = Arrays.copyOf(values, newCapacity);
}
private boolean equals(int[] a, int[] b, int bSize) {
if (a.length != bSize) {
return false;
}
for (int i = 0; i < bSize; i++) {
if (a[i] != b[i]) {
return false;
}
private boolean equals(int[] a, int size, int[] b) {
if (b.length != size) return false;
for (int i = 0; i < size; ++i) {
if (a[i] != b[i]) return false;
}
return true;
}
private boolean equalsWithExtra(int[] a, int[] b, int extra) {
if (a.length != b.length + 1) {
return false;
}
for (int i = 0; i < b.length; i++) {
if (a[i] != b[i]) {
return false;
}
}
return a[b.length] == extra;
}
private static int murmur(int[] data, int size) {
int m = 0x5bd1e995;
int h = 0x9747b28c ^ data.length;
int h = 0x9747b28c ^ size;
for (int i = 0; i < size; i++) {
int k = data[i];
@@ -167,29 +108,4 @@ public class StackStorage {
return h;
}
private static int murmurWithExtra(int[] data, int extra) {
int m = 0x5bd1e995;
int h = 0x9747b28c ^ (data.length + 1);
for (int k : data) {
k *= m;
k ^= k >>> 24;
k *= m;
h *= m;
h ^= k;
}
int k = extra * m;
k ^= k >>> 24;
k *= m;
h *= m;
h ^= k;
h ^= h >>> 13;
h *= m;
h ^= h >>> 15;
return h;
}
}

View File

@@ -36,7 +36,7 @@ public class SynonymTable {
nodeSynonyms[i] = synonymsCount + i;
}
for (int i = 0; i < synonymsCount; i++) {
nodeSynonyms[(int) (synonyms[i] & 0xFFFFFFFFL)] = i;
nodeSynonyms[(int) synonyms[i]] = i;
}
}
@@ -45,7 +45,7 @@ public class SynonymTable {
}
public int synonymAt(int synonymIndex) {
return (int) (synonyms[synonymIndex] & 0xFFFFFFFFL) + synonymsCount;
return (int) synonyms[synonymIndex] + synonymsCount;
}
public int nodeIdOrSynonym(int node) {

View File

@@ -12,6 +12,7 @@ import java.util.Arrays;
*/
public class Dictionary<T> {
private static final int INITIAL_CAPACITY = 16;
private static final long USED_BIT = 1L << 63;
private long[] keys;
private Object[] values;
@@ -36,10 +37,17 @@ public class Dictionary<T> {
return size;
}
public void put(long key, T value) {
if (key == 0) {
throw new IllegalArgumentException("Zero key not allowed");
// key[i]==0 is used to signal that the i-th position is unset.
// Thus, we flip USED_BIT, so the user can still use key=0.
private static long remapKey(long key) {
if (key == USED_BIT) {
throw new IllegalArgumentException("Key not allowed");
}
return key ^ USED_BIT;
}
public void put(long key, T value) {
key = remapKey(key);
int mask = keys.length - 1;
int i = hashCode(key) & mask;
@@ -60,6 +68,8 @@ public class Dictionary<T> {
@SuppressWarnings("unchecked")
public T get(long key) {
key = remapKey(key);
int mask = keys.length - 1;
int i = hashCode(key) & mask;
while (keys[i] != key && keys[i] != 0) {
@@ -72,7 +82,8 @@ public class Dictionary<T> {
public void forEach(Visitor<T> visitor) {
for (int i = 0; i < keys.length; i++) {
if (keys[i] != 0) {
visitor.visit(keys[i], (T) values[i]);
// Map key back, see remapKey
visitor.visit(keys[i] ^ USED_BIT, (T) values[i]);
}
}
}

View File

@@ -48,11 +48,13 @@ public class JfrReader implements Closeable {
public long chunkEndNanos;
public long chunkStartTicks;
public long ticksPerSec;
public double nanosPerTick;
public boolean stopAtNewChunk;
public final Dictionary<JfrClass> types = new Dictionary<>();
public final Map<String, JfrClass> typesByName = new HashMap<>();
public final Dictionary<String> threads = new Dictionary<>();
public final Dictionary<Long> javaThreads = new Dictionary<>();
public final Dictionary<ClassRef> classes = new Dictionary<>();
public final Dictionary<String> strings = new Dictionary<>();
public final Dictionary<byte[]> symbols = new Dictionary<>();
@@ -66,6 +68,7 @@ public class JfrReader implements Closeable {
private int executionSample;
private int nativeMethodSample;
private int wallClockSample;
private int methodTrace;
private int allocationInNewTLAB;
private int allocationOutsideTLAB;
private int allocationSample;
@@ -75,6 +78,9 @@ public class JfrReader implements Closeable {
private int activeSetting;
private int malloc;
private int free;
private int cpuTimeSample;
private int nativeLock;
private boolean hasWallTimeSpan;
public JfrReader(String fileName) throws IOException {
this.ch = FileChannel.open(Paths.get(fileName), StandardOpenOption.READ);
@@ -118,6 +124,10 @@ public class JfrReader implements Closeable {
return endNanos - startNanos;
}
public long chunkDurationNanos() {
return chunkEndNanos - chunkStartNanos;
}
public <E extends Event> void registerEvent(String name, Class<E> eventClass) {
JfrClass type = typesByName.get(name);
if (type != null) {
@@ -158,6 +168,10 @@ public class JfrReader implements Closeable {
int size = getVarint();
int type = getVarint();
if (size <= 0) {
throw new IOException("Corrupted JFR recording: invalid event size");
}
if (type == 'L' && buf.getInt(pos) == CHUNK_SIGNATURE) {
if (state != STATE_NEW_CHUNK && stopAtNewChunk) {
buf.position(pos);
@@ -172,10 +186,14 @@ public class JfrReader implements Closeable {
if (cls == null || cls == ExecutionSample.class) return (E) readExecutionSample(false);
} else if (type == wallClockSample) {
if (cls == null || cls == ExecutionSample.class) return (E) readExecutionSample(true);
} else if (type == methodTrace) {
if (cls == null || cls == MethodTrace.class) return (E) readMethodTrace();
} else if (type == allocationInNewTLAB) {
if (cls == null || cls == AllocationSample.class) return (E) readAllocationSample(true);
} else if (type == allocationOutsideTLAB || type == allocationSample) {
if (cls == null || cls == AllocationSample.class) return (E) readAllocationSample(false);
} else if (type == cpuTimeSample) {
if (cls == null || cls == ExecutionSample.class) return (E) readCPUTimeSample();
} else if (type == malloc) {
if (cls == null || cls == MallocEvent.class) return (E) readMallocEvent(true);
} else if (type == free) {
@@ -186,17 +204,21 @@ public class JfrReader implements Closeable {
if (cls == null || cls == ContendedLock.class) return (E) readContendedLock(false);
} else if (type == threadPark) {
if (cls == null || cls == ContendedLock.class) return (E) readContendedLock(true);
} else if (type == nativeLock) {
if (cls == null || cls == NativeLockEvent.class) return (E) readNativeLockEvent();
} else if (type == activeSetting) {
readActiveSetting();
} else {
Constructor<? extends Event> customEvent = customEvents.get(type);
if (customEvent != null && (cls == null || cls == customEvent.getDeclaringClass())) {
long eventEnd = filePosition + pos + size;
ensureBytes(size - (buf.position() - pos));
try {
return (E) customEvent.newInstance(this);
} catch (ReflectiveOperationException e) {
throw new IllegalStateException(e);
} finally {
seek(filePosition + pos + size);
seek(eventEnd);
}
}
}
@@ -208,15 +230,25 @@ public class JfrReader implements Closeable {
return null;
}
private ExecutionSample readExecutionSample(boolean hasSamples) {
private ExecutionSample readExecutionSample(boolean wall) {
long time = getVarlong();
int tid = getVarint();
int stackTraceId = getVarint();
int threadState = getVarint();
int samples = hasSamples ? getVarint() : 1;
int samples = wall ? getVarint() : 1;
if (wall && hasWallTimeSpan) getVarlong(); // timeSpan is ignored
return new ExecutionSample(time, tid, stackTraceId, threadState, samples);
}
private MethodTrace readMethodTrace() {
long startTime = getVarlong();
long duration = getVarlong();
int tid = getVarint();
int stackTraceId = getVarint();
int method = getVarint();
return new MethodTrace(startTime, tid, stackTraceId, method, duration);
}
private AllocationSample readAllocationSample(boolean tlab) {
long time = getVarlong();
int tid = getVarint();
@@ -227,6 +259,25 @@ public class JfrReader implements Closeable {
return new AllocationSample(time, tid, stackTraceId, classId, allocationSize, tlabSize);
}
private ExecutionSample readCPUTimeSample() {
long time = getVarlong();
int stackTraceId = getVarint();
int tid = getVarint();
boolean failed = getBoolean();
long samplingPeriod = getVarlong();
boolean biased = getBoolean();
return new ExecutionSample(time, tid, stackTraceId, ExecutionSample.CPU_TIME_SAMPLE, 1);
}
private NativeLockEvent readNativeLockEvent() {
long time = getVarlong();
long duration = getVarlong();
int tid = getVarint();
int stackTraceId = getVarint();
long address = getVarlong();
return new NativeLockEvent(time, tid, stackTraceId, address, duration);
}
private MallocEvent readMallocEvent(boolean hasSize) {
long time = getVarlong();
int tid = getVarint();
@@ -302,6 +353,7 @@ public class JfrReader implements Closeable {
startNanos = Math.min(startNanos, chunkStartNanos);
endNanos = Math.max(endNanos, chunkEndNanos);
startTicks = Math.min(startTicks, chunkStartTicks);
nanosPerTick = 1e9 / ticksPerSec;
types.clear();
typesByName.clear();
@@ -422,7 +474,9 @@ public class JfrReader implements Closeable {
}
private void readThreads(int fieldCount) {
int count = threads.preallocate(getVarint());
int count = getVarint();
threads.preallocate(count);
javaThreads.preallocate(count);
for (int i = 0; i < count; i++) {
long id = getVarlong();
String osName = getString();
@@ -431,6 +485,7 @@ public class JfrReader implements Closeable {
long javaThreadId = getVarlong();
readFields(fieldCount - 4);
threads.put(id, javaName != null ? javaName : osName);
javaThreads.put(id, javaThreadId);
}
}
@@ -548,6 +603,7 @@ public class JfrReader implements Closeable {
executionSample = getTypeId("jdk.ExecutionSample");
nativeMethodSample = getTypeId("jdk.NativeMethodSample");
wallClockSample = getTypeId("profiler.WallClockSample");
methodTrace = getTypeId("jdk.MethodTrace");
allocationInNewTLAB = getTypeId("jdk.ObjectAllocationInNewTLAB");
allocationOutsideTLAB = getTypeId("jdk.ObjectAllocationOutsideTLAB");
allocationSample = getTypeId("jdk.ObjectAllocationSample");
@@ -557,11 +613,17 @@ public class JfrReader implements Closeable {
activeSetting = getTypeId("jdk.ActiveSetting");
malloc = getTypeId("profiler.Malloc");
free = getTypeId("profiler.Free");
cpuTimeSample = getTypeId("jdk.CPUTimeSample");
nativeLock = getTypeId("profiler.NativeLock");
registerEvent("jdk.CPULoad", CPULoad.class);
registerEvent("jdk.GCHeapSummary", GCHeapSummary.class);
registerEvent("jdk.ObjectCount", ObjectCount.class);
registerEvent("jdk.ObjectCountAfterGC", ObjectCount.class);
registerEvent("profiler.ProcessSample", ProcessSample.class);
JfrClass wallClass = typesByName.get("profiler.WallClockSample");
hasWallTimeSpan = wallClass != null && wallClass.field("timeSpan") != null;
}
private int getTypeId(String typeName) {
@@ -616,6 +678,14 @@ public class JfrReader implements Closeable {
return buf.getDouble();
}
public byte getByte() {
return buf.get();
}
public boolean getBoolean() {
return buf.get() != 0;
}
public String getString() {
switch (buf.get()) {
case 0:
@@ -657,6 +727,12 @@ public class JfrReader implements Closeable {
}
}
public void rewind() throws IOException {
seek(0);
state = STATE_NEW_CHUNK;
ensureBytes(CHUNK_HEADER_SIZE);
}
private boolean ensureBytes(int needed) throws IOException {
if (buf.remaining() >= needed) {
return true;
@@ -682,4 +758,8 @@ public class JfrReader implements Closeable {
buf.flip();
return buf.limit() > 0;
}
public long eventTimeToNanos(long time) {
return chunkStartNanos + (long) ((time - chunkStartTicks) * nanosPerTick);
}
}

View File

@@ -6,6 +6,10 @@
package one.jfr.event;
public class ExecutionSample extends Event {
// Synthetic thread state to distinguish samples converted from jdk.CPUTimeSample event.
// A small constant suitable for BitSet, does not clash with any existing thread state.
public static final int CPU_TIME_SAMPLE = 254;
public final int threadState;
public final int samples;

View File

@@ -12,17 +12,26 @@ import java.util.HashMap;
public class MallocLeakAggregator implements EventCollector {
private final EventCollector wrapped;
private final double tail;
private final Map<Long, MallocEvent> addresses;
private List<MallocEvent> events;
private long minTime = Long.MAX_VALUE;
private long maxTime = Long.MIN_VALUE;
public MallocLeakAggregator(EventCollector wrapped) {
public MallocLeakAggregator(EventCollector wrapped, double tail) {
if (tail < 0.0 || tail > 1.0) {
throw new IllegalArgumentException("tail must be between 0 and 1");
}
this.wrapped = wrapped;
this.tail = tail;
this.addresses = new HashMap<>();
}
@Override
public void collect(Event e) {
events.add((MallocEvent) e);
minTime = Math.min(minTime, e.time);
maxTime = Math.max(maxTime, e.time);
}
@Override
@@ -47,9 +56,15 @@ public class MallocLeakAggregator implements EventCollector {
@Override
public boolean finish() {
// Ignore tail allocations made in the last N% of profiling session:
// they are too young to be considered a leak
long timeCutoff = (long) (minTime * tail + maxTime * (1.0 - tail));
wrapped.beforeChunk();
for (Event e : addresses.values()) {
wrapped.collect(e);
if (e.time <= timeCutoff) {
wrapped.collect(e);
}
}
wrapped.afterChunk();

View File

@@ -0,0 +1,36 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.jfr.event;
public class MethodTrace extends Event {
public final int method;
public final long duration;
public MethodTrace(long time, int tid, int stackTraceId, int method, long duration) {
super(time, tid, stackTraceId);
this.method = method;
this.duration = duration;
}
@Override
public int hashCode() {
return method * 127 + stackTraceId;
}
@Override
public boolean sameGroup(Event o) {
if (o instanceof MethodTrace) {
MethodTrace c = (MethodTrace) o;
return method == c.method;
}
return false;
}
@Override
public long value() {
return duration;
}
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.jfr.event;
public class NativeLockEvent extends Event {
public final long address;
public final long duration;
public NativeLockEvent(long time, int tid, int stackTraceId, long address, long duration) {
super(time, tid, stackTraceId);
this.address = address;
this.duration = duration;
}
@Override
public long value() {
return duration;
}
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.jfr.event;
import one.jfr.JfrReader;
public class ProcessSample extends Event {
public final int pid;
public final int ppid;
public final String name;
public final String cmdLine;
public final int uid;
public final byte state;
public final long processStartTime;
public final float cpuUser;
public final float cpuSystem;
public final float cpuPercent;
public final int threads;
public final long vmSize;
public final long vmRss;
public final long rssAnon;
public final long rssFiles;
public final long rssShmem;
public final long minorFaults;
public final long majorFaults;
public final long ioRead;
public final long ioWrite;
public ProcessSample(JfrReader jfr) {
super(jfr.getVarlong(), 0, 0);
this.pid = jfr.getVarint();
this.ppid = jfr.getVarint();
this.name = jfr.getString();
this.cmdLine = jfr.getString();
this.uid = jfr.getVarint();
this.state = jfr.getByte();
this.processStartTime = jfr.getVarlong();
this.cpuUser = jfr.getFloat();
this.cpuSystem = jfr.getFloat();
this.cpuPercent = jfr.getFloat();
this.threads = jfr.getVarint();
this.vmSize = jfr.getVarlong();
this.vmRss = jfr.getVarlong();
this.rssAnon = jfr.getVarlong();
this.rssFiles = jfr.getVarlong();
this.rssShmem = jfr.getVarlong();
this.minorFaults = jfr.getVarlong();
this.majorFaults = jfr.getVarlong();
this.ioRead = jfr.getVarlong();
this.ioWrite = jfr.getVarlong();
}
}

View File

@@ -44,6 +44,12 @@ public class Proto {
return this;
}
public Proto fieldFixed64(int index, long n) {
tag(index, 1);
writeFixed64(n);
return this;
}
public Proto field(int index, double d) {
tag(index, 1);
writeDouble(d);
@@ -69,21 +75,30 @@ public class Proto {
return this;
}
public int startField(int index) {
// 32 bits for the start position
// 32 bits for the max length byte count
public long startField(int index, int maxLenByteCount) {
tag(index, 2);
ensureCapacity(3);
return pos += 3;
ensureCapacity(maxLenByteCount);
pos += maxLenByteCount;
return ((long) pos << 32) | maxLenByteCount;
}
public void commitField(int mark) {
int length = pos - mark;
if (length >= 1 << (7 * 3)) {
public void commitField(long mark) {
int messageStart = (int) (mark >> 32);
int maxLenByteCount = (int) mark;
int actualLength = pos - messageStart;
if (actualLength >= 1L << (7 * maxLenByteCount)) {
throw new IllegalArgumentException("Field too large");
}
buf[mark - 3] = (byte) (0x80 | (length & 0x7f));
buf[mark - 2] = (byte) (0x80 | ((length >>> 7) & 0x7f));
buf[mark - 1] = (byte) (length >>> 14);
int lenBytesStart = messageStart - maxLenByteCount;
for (int i = 0; i < maxLenByteCount - 1; ++i) {
buf[lenBytesStart + i] = (byte) (0x80 | actualLength);
actualLength >>>= 7;
}
buf[lenBytesStart + maxLenByteCount - 1] = (byte) actualLength;
}
public void writeInt(int n) {
@@ -109,8 +124,11 @@ public class Proto {
}
public void writeDouble(double d) {
writeFixed64(Double.doubleToRawLongBits(d));
}
public void writeFixed64(long n) {
ensureCapacity(8);
long n = Double.doubleToRawLongBits(d);
buf[pos] = (byte) n;
buf[pos + 1] = (byte) (n >>> 8);
buf[pos + 2] = (byte) (n >>> 16);

View File

@@ -42,14 +42,14 @@ static int pthread_setspecific_hook(pthread_key_t key, const void* value) {
}
void CpuEngine::onThreadStart() {
CpuEngine* current = __atomic_load_n(&_current, __ATOMIC_ACQUIRE);
CpuEngine* current = loadAcquire(_current);
if (current != NULL) {
current->createForThread(OS::threadId());
}
}
void CpuEngine::onThreadEnd() {
CpuEngine* current = __atomic_load_n(&_current, __ATOMIC_ACQUIRE);
CpuEngine* current = loadAcquire(_current);
if (current != NULL) {
current->destroyForThread(OS::threadId());
}
@@ -80,12 +80,12 @@ bool CpuEngine::setupThreadHook() {
void CpuEngine::enableThreadHook() {
*_pthread_entry = (void*)pthread_setspecific_hook;
__atomic_store_n(&_current, this, __ATOMIC_RELEASE);
storeRelease(_current, this);
}
void CpuEngine::disableThreadHook() {
*_pthread_entry = (void*)pthread_setspecific;
__atomic_store_n(&_current, NULL, __ATOMIC_RELEASE);
storeRelease(_current, nullptr);
}
bool CpuEngine::isResourceLimit(int err) {
@@ -124,9 +124,8 @@ void CpuEngine::signalHandlerJ9(int signo, siginfo_t* siginfo, void* ucontext) {
if (!_enabled) return;
J9StackTraceNotification notif;
StackContext java_ctx;
notif.num_frames = _cstack == CSTACK_NO ? 0 : _cstack == CSTACK_DWARF
? StackWalker::walkDwarf(ucontext, notif.addr, MAX_J9_NATIVE_FRAMES, &java_ctx)
: StackWalker::walkFP(ucontext, notif.addr, MAX_J9_NATIVE_FRAMES, &java_ctx);
? StackWalker::walkDwarf(ucontext, notif.addr, MAX_J9_NATIVE_FRAMES)
: StackWalker::walkFP(ucontext, notif.addr, MAX_J9_NATIVE_FRAMES);
J9StackTraces::checkpoint(_interval, &notif);
}

View File

@@ -23,7 +23,6 @@ class CTimer : public CpuEngine {
return "ctimer";
}
Error check(Arguments& args);
Error start(Arguments& args);
void stop();
@@ -36,10 +35,6 @@ class CTimer : public CpuEngine {
class CTimer : public CpuEngine {
public:
Error check(Arguments& args) {
return Error("CTimer is not supported on this platform");
}
Error start(Arguments& args) {
return Error("CTimer is not supported on this platform");
}

View File

@@ -38,7 +38,7 @@ int CTimer::createForThread(int tid) {
sev.sigev_value.sival_ptr = NULL;
sev.sigev_signo = _signal;
sev.sigev_notify = SIGEV_THREAD_ID;
((int*)&sev.sigev_notify)[1] = tid;
(&sev.sigev_notify)[1] = tid;
// Use raw syscalls, since libc wrapper allows only predefined clocks
clockid_t clock = thread_cpu_clock(tid);
@@ -73,20 +73,6 @@ void CTimer::destroyForThread(int tid) {
}
}
Error CTimer::check(Arguments& args) {
if (!setupThreadHook()) {
return Error("Could not set pthread hook");
}
timer_t timer;
if (timer_create(CLOCK_THREAD_CPUTIME_ID, NULL, &timer) < 0) {
return Error("Failed to create CPU timer");
}
timer_delete(timer);
return Error::OK;
}
Error CTimer::start(Arguments& args) {
if (!setupThreadHook()) {
return Error("Could not set pthread hook");

View File

@@ -55,7 +55,7 @@ enum {
};
FrameDesc FrameDesc::empty_frame = {0, DW_REG_SP | EMPTY_FRAME_SIZE << 8, DW_SAME_FP, -EMPTY_FRAME_SIZE};
FrameDesc FrameDesc::empty_frame = {0, DW_REG_SP | EMPTY_FRAME_SIZE << 8, DW_SAME_FP, INITIAL_PC_OFFSET};
FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | LINKED_FRAME_SIZE << 8, -LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT};
@@ -136,12 +136,12 @@ void DwarfParser::parseInstructions(u32 loc, const char* end) {
u32 cfa_reg = DW_REG_SP;
int cfa_off = EMPTY_FRAME_SIZE;
int fp_off = DW_SAME_FP;
int pc_off = -EMPTY_FRAME_SIZE;
int pc_off = INITIAL_PC_OFFSET;
u32 rem_cfa_reg = DW_REG_SP;
int rem_cfa_off = EMPTY_FRAME_SIZE;
int rem_fp_off = DW_SAME_FP;
int rem_pc_off = -EMPTY_FRAME_SIZE;
int rem_pc_off = INITIAL_PC_OFFSET;
while (_ptr < end) {
u8 op = get8();
@@ -270,6 +270,8 @@ void DwarfParser::parseInstructions(u32 loc, const char* end) {
case DW_CFA_restore:
if ((op & 0x3f) == DW_REG_FP) {
fp_off = DW_SAME_FP;
} else if ((op & 0x3f) == DW_REG_PC) {
pc_off = INITIAL_PC_OFFSET;
}
break;
}

View File

@@ -15,6 +15,7 @@ const int DW_REG_INVALID = 255; // denotes unsupported configuration
const int DW_PC_OFFSET = 1;
const int DW_SAME_FP = 0x80000000;
const int DW_LINK_REGISTER = 0x80000000;
const int DW_STACK_SLOT = sizeof(void*);
@@ -27,6 +28,7 @@ const int DW_REG_SP = 7;
const int DW_REG_PC = 16;
const int EMPTY_FRAME_SIZE = DW_STACK_SLOT;
const int LINKED_FRAME_SIZE = 2 * DW_STACK_SLOT;
const int INITIAL_PC_OFFSET = -EMPTY_FRAME_SIZE;
#elif defined(__i386__)
@@ -37,6 +39,7 @@ const int DW_REG_SP = 4;
const int DW_REG_PC = 8;
const int EMPTY_FRAME_SIZE = DW_STACK_SLOT;
const int LINKED_FRAME_SIZE = 2 * DW_STACK_SLOT;
const int INITIAL_PC_OFFSET = -EMPTY_FRAME_SIZE;
#elif defined(__aarch64__)
@@ -47,6 +50,7 @@ const int DW_REG_SP = 31;
const int DW_REG_PC = 30;
const int EMPTY_FRAME_SIZE = 0;
const int LINKED_FRAME_SIZE = 0;
const int INITIAL_PC_OFFSET = DW_LINK_REGISTER;
#else
@@ -57,6 +61,7 @@ const int DW_REG_SP = 1;
const int DW_REG_PC = 2;
const int EMPTY_FRAME_SIZE = 0;
const int LINKED_FRAME_SIZE = 0;
const int INITIAL_PC_OFFSET = DW_LINK_REGISTER;
#endif

View File

@@ -8,10 +8,6 @@
volatile bool Engine::_enabled = false;
Error Engine::check(Arguments& args) {
return Error::OK;
}
Error Engine::start(Arguments& args) {
return Error::OK;
}

View File

@@ -46,7 +46,6 @@ class Engine {
return "total";
}
virtual Error check(Arguments& args);
virtual Error start(Arguments& args);
virtual void stop();

View File

@@ -7,6 +7,7 @@
#define _EVENT_H
#include <stdint.h>
#include "asprof.h"
#include "os.h"
@@ -15,14 +16,17 @@ enum EventType {
PERF_SAMPLE,
EXECUTION_SAMPLE,
WALL_CLOCK_SAMPLE,
INSTRUMENTED_METHOD,
NATIVE_LOCK_SAMPLE,
MALLOC_SAMPLE,
INSTRUMENTED_METHOD,
METHOD_TRACE,
ALLOC_SAMPLE,
ALLOC_OUTSIDE_TLAB,
LIVE_OBJECT,
LOCK_SAMPLE,
PARK_SAMPLE,
PROFILING_WINDOW,
USER_EVENT,
};
class Event {
@@ -41,9 +45,18 @@ class ExecutionEvent : public Event {
ExecutionEvent(u64 start_time) : _start_time(start_time), _thread_state(THREAD_UNKNOWN) {}
};
class MethodTraceEvent : public Event {
public:
u64 _start_time;
u64 _duration;
MethodTraceEvent(u64 start_time, u64 duration) : _start_time(start_time), _duration(duration) {}
};
class WallClockEvent : public Event {
public:
u64 _start_time;
u64 _time_span;
ThreadState _thread_state;
u32 _samples;
};
@@ -63,6 +76,13 @@ class LockEvent : public EventWithClassId {
long long _timeout;
};
class NativeLockEvent : public Event {
public:
u64 _start_time;
u64 _end_time;
uintptr_t _address;
};
class LiveObject : public EventWithClassId {
public:
u64 _start_time;
@@ -83,4 +103,12 @@ class MallocEvent : public Event {
u64 _size;
};
class UserEvent : public Event {
public:
u64 _start_time;
asprof_jfr_event_key _type;
const uint8_t* _data;
size_t _len;
};
#endif // _EVENT_H

View File

@@ -17,6 +17,8 @@
#define RESTARTABLE(call) ({ ssize_t ret; while ((ret = call) < 0 && errno == EINTR); ret; })
#define MAX_PROBE_LEN 256
// base header for all requests
enum request_type {
@@ -34,6 +36,7 @@ struct perf_fd_request {
int tid;
int target_cpu;
struct perf_event_attr attr;
char probe_name[MAX_PROBE_LEN];
};
struct fd_response {

View File

@@ -26,7 +26,7 @@ class FdTransferClient {
}
}
static int requestPerfFd(int *tid, int target_cpu, struct perf_event_attr *attr);
static int requestPerfFd(int* tid, int target_cpu, struct perf_event_attr* attr, const char* probe_name);
static int requestKallsymsFd();
};

View File

@@ -47,14 +47,16 @@ bool FdTransferClient::connectToServer(const char *path) {
return true;
}
int FdTransferClient::requestPerfFd(int *tid, int target_cpu, struct perf_event_attr *attr) {
int FdTransferClient::requestPerfFd(int* tid, int target_cpu, struct perf_event_attr* attr, const char* probe_name) {
struct perf_fd_request request;
request.header.type = PERF_FD;
request.tid = *tid;
request.target_cpu = target_cpu;
memcpy(&request.attr, attr, sizeof(request.attr));
*stpncpy(request.probe_name, probe_name, sizeof(request.probe_name) - 1) = 0;
if (RESTARTABLE(send(_peer, &request, sizeof(request), 0)) != sizeof(request)) {
size_t request_size = sizeof(request) - sizeof(request.probe_name) + strlen(request.probe_name) + 1;
if (RESTARTABLE(send(_peer, &request, request_size, 0)) != request_size) {
Log::warn("FdTransferClient send(): %s", strerror(errno));
return -1;
}

View File

@@ -254,7 +254,7 @@ void FlameGraph::printTreeFrame(Writer& out, const Trie& f, int level, const cha
}
out << _buf;
if (trie->_children.size() > 0) {
if (!trie->_children.empty()) {
out << "<ul>\n";
if (trie->_total >= _mintotal) {
printTreeFrame(out, *trie, level + 1, names);

View File

@@ -3,6 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
#include <assert.h>
#include <map>
#include <string>
#include <arpa/inet.h>
@@ -14,18 +15,19 @@
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>
#include "demangle.h"
#include "flightRecorder.h"
#include "incbin.h"
#include "jfrMetadata.h"
#include "dictionary.h"
#include "lookup.h"
#include "os.h"
#include "processSampler.h"
#include "profiler.h"
#include "spinLock.h"
#include "symbols.h"
#include "threadFilter.h"
#include "threadLocalData.h"
#include "tsc.h"
#include "userEvents.h"
#include "vmStructs.h"
@@ -56,8 +58,9 @@ static jclass _jfr_sync_class = NULL;
static jmethodID _start_method;
static jmethodID _stop_method;
static jmethodID _box_method;
static bool _jfr_starting = false;
static const char* const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "lbr", "vm"};
static const char* const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "vm"};
struct CpuTime {
@@ -71,226 +74,6 @@ struct CpuTimes {
CpuTime total;
};
class MethodInfo {
public:
MethodInfo() : _mark(false), _key(0) {
}
bool _mark;
u32 _key;
u32 _class;
u32 _name;
u32 _sig;
jint _modifiers;
jint _line_number_table_size;
jvmtiLineNumberEntry* _line_number_table;
FrameTypeId _type;
jint getLineNumber(jint bci) {
if (_line_number_table_size == 0) {
return 0;
}
int i = 1;
while (i < _line_number_table_size && bci >= _line_number_table[i].start_location) {
i++;
}
return _line_number_table[i - 1].line_number;
}
};
class MethodMap : public std::map<jmethodID, MethodInfo> {
public:
MethodMap() {
}
~MethodMap() {
jvmtiEnv* jvmti = VM::jvmti();
for (const_iterator it = begin(); it != end(); ++it) {
jvmtiLineNumberEntry* line_number_table = it->second._line_number_table;
if (line_number_table != NULL) {
jvmti->Deallocate((unsigned char*)line_number_table);
}
}
}
size_t usedMemory() {
size_t bytes = 0;
for (const_iterator it = begin(); it != end(); ++it) {
bytes += sizeof(jmethodID) + sizeof(MethodInfo);
bytes += it->second._line_number_table_size * sizeof(jvmtiLineNumberEntry);
}
return bytes;
}
};
class Lookup {
public:
MethodMap* _method_map;
Dictionary* _classes;
Dictionary _packages;
Dictionary _symbols;
private:
JNIEnv* _jni;
void fillNativeMethodInfo(MethodInfo* mi, const char* name, const char* lib_name) {
if (lib_name == NULL) {
mi->_class = _classes->lookup("");
} else if (lib_name[0] == '[' && lib_name[1] != 0) {
mi->_class = _classes->lookup(lib_name + 1, strlen(lib_name) - 2);
} else {
mi->_class = _classes->lookup(lib_name);
}
mi->_modifiers = 0x100;
mi->_line_number_table_size = 0;
mi->_line_number_table = NULL;
if (Demangle::needsDemangling(name)) {
char* demangled = Demangle::demangle(name, false);
if (demangled != NULL) {
mi->_name = _symbols.lookup(demangled);
mi->_sig = _symbols.lookup("()L;");
mi->_type = FRAME_CPP;
free(demangled);
return;
}
}
size_t len = strlen(name);
if (len >= 4 && strcmp(name + len - 4, "_[k]") == 0) {
mi->_name = _symbols.lookup(name, len - 4);
mi->_sig = _symbols.lookup("(Lk;)L;");
mi->_type = FRAME_KERNEL;
} else {
mi->_name = _symbols.lookup(name);
mi->_sig = _symbols.lookup("()L;");
mi->_type = FRAME_NATIVE;
}
}
bool fillJavaMethodInfo(MethodInfo* mi, jmethodID method, bool first_time) {
if (VMStructs::hasMethodStructs()) {
// Workaround for JDK-8313816
VMMethod* vm_method = VMMethod::fromMethodID(method);
if (vm_method == NULL || vm_method->id() == NULL) {
return false;
}
}
jvmtiEnv* jvmti = VM::jvmti();
jclass method_class = NULL;
char* class_name = NULL;
char* method_name = NULL;
char* method_sig = NULL;
if (jvmti->GetMethodName(method, &method_name, &method_sig, NULL) == 0 &&
jvmti->GetMethodDeclaringClass(method, &method_class) == 0 &&
jvmti->GetClassSignature(method_class, &class_name, NULL) == 0) {
mi->_class = _classes->lookup(class_name + 1, strlen(class_name) - 2);
mi->_name = _symbols.lookup(method_name);
mi->_sig = _symbols.lookup(method_sig);
} else {
mi->_class = _classes->lookup("");
mi->_name = _symbols.lookup("jvmtiError");
mi->_sig = _symbols.lookup("()L;");
}
if (method_class) {
_jni->DeleteLocalRef(method_class);
}
jvmti->Deallocate((unsigned char*)method_sig);
jvmti->Deallocate((unsigned char*)method_name);
jvmti->Deallocate((unsigned char*)class_name);
if (first_time && jvmti->GetMethodModifiers(method, &mi->_modifiers) != 0) {
mi->_modifiers = 0;
}
if (first_time && jvmti->GetLineNumberTable(method, &mi->_line_number_table_size, &mi->_line_number_table) != 0) {
mi->_line_number_table_size = 0;
mi->_line_number_table = NULL;
}
mi->_type = FRAME_INTERPRETED;
return true;
}
void fillJavaClassInfo(MethodInfo* mi, u32 class_id) {
mi->_class = class_id;
mi->_name = _symbols.lookup("");
mi->_sig = _symbols.lookup("()L;");
mi->_modifiers = 0;
mi->_line_number_table_size = 0;
mi->_line_number_table = NULL;
mi->_type = FRAME_INLINED;
}
public:
Lookup(MethodMap* method_map, Dictionary* classes) :
_method_map(method_map), _classes(classes), _packages(), _symbols(), _jni(VM::jni()) {
}
MethodInfo* resolveMethod(ASGCT_CallFrame& frame) {
jmethodID method = frame.method_id;
MethodInfo* mi = &(*_method_map)[method];
bool first_time = mi->_key == 0;
if (first_time) {
mi->_key = _method_map->size();
}
if (!mi->_mark) {
mi->_mark = true;
if (method == NULL) {
fillNativeMethodInfo(mi, "unknown", NULL);
} else if (frame.bci > BCI_NATIVE_FRAME) {
if (!fillJavaMethodInfo(mi, method, first_time)) {
fillNativeMethodInfo(mi, "stale_jmethodID", NULL);
}
} else if (frame.bci == BCI_NATIVE_FRAME) {
const char* name = (const char*)method;
fillNativeMethodInfo(mi, name, Profiler::instance()->getLibraryName(name));
} else if (frame.bci == BCI_ADDRESS) {
char buf[32];
snprintf(buf, sizeof(buf), "%p", method);
fillNativeMethodInfo(mi, buf, NULL);
} else if (frame.bci == BCI_ERROR) {
fillNativeMethodInfo(mi, (const char*)method, NULL);
} else {
fillJavaClassInfo(mi, (uintptr_t)method);
}
}
return mi;
}
u32 getPackage(const char* class_name) {
const char* package = strrchr(class_name, '/');
if (package == NULL) {
return 0;
}
if (package[1] >= '0' && package[1] <= '9') {
// Seems like a hidden or anonymous class, e.g. com/example/Foo/0x012345
do {
if (package == class_name) return 0;
} while (*--package != '/');
}
if (class_name[0] == '[') {
class_name = strchr(class_name, 'L') + 1;
}
return _packages.lookup(class_name, package - class_name);
}
u32 getSymbol(const char* name) {
return _symbols.lookup(name);
}
};
class Buffer {
private:
int _offset;
@@ -391,6 +174,21 @@ class Buffer {
put(v, len);
}
void putByteString(const char* v, u32 len) {
put8(5); // STRING_ENCODING_LATIN1_BYTE_ARRAY
putVar32(len);
put(v, len);
}
void putByteString(const char* v) {
if (v == NULL) {
put8(0);
} else {
size_t len = strlen(v);
putByteString(v, len < MAX_STRING_LENGTH ? len : MAX_STRING_LENGTH);
}
}
void put8(int offset, char v) {
_data[offset] = v;
}
@@ -457,6 +255,8 @@ class Recording {
u32 _last_gc_id;
CpuTimes _last_times;
SmallBuffer _monitor_buf;
RecordingBuffer _proc_buf;
ProcessSampler _process_sampler;
static float ratio(float value) {
return value < 0 ? 0 : value > 1 ? 1 : value;
@@ -509,6 +309,10 @@ class Recording {
_heap_monitor_enabled = !args.hasOption(NO_HEAP_SUMMARY) && VM::_totalMemory != NULL && VM::_freeMemory != NULL;
_last_gc_id = 0;
if (args._proc > 0) {
_process_sampler.enable(args._proc * 1000000);
}
}
~Recording() {
@@ -528,6 +332,7 @@ class Recording {
off_t finishChunk() {
flush(&_monitor_buf);
flush(&_proc_buf);
writeNativeLibraries(_buf);
@@ -646,6 +451,26 @@ class Recording {
_last_gc_id = gc_id;
}
void processMonitorCycle(const u64 wall_time) {
if (!_process_sampler.shouldSample(wall_time)) return;
const u64 deadline_ns = OS::nanotime() + MAX_TIME_NS;
const int process_count = _process_sampler.sample(wall_time);
for (int pid_index = 0; pid_index < process_count; pid_index++) {
const u64 current_time = OS::nanotime();
if (current_time > deadline_ns) {
Log::debug("Sampled %d of %d processes due to time limit", pid_index, process_count);
break;
}
ProcessInfo info;
if (_process_sampler.getProcessInfo(pid_index, current_time, info)) {
flushIfNeeded(&_proc_buf, RECORDING_BUFFER_LIMIT - MAX_PROCESS_SAMPLE_JFR_EVENT_LENGTH);
recordProcessSample(&_proc_buf, &info);
}
}
}
bool hasMasterRecording() const {
return _master_recording_file != NULL;
}
@@ -714,26 +539,22 @@ class Recording {
return true;
}
const char* getFeaturesString(char* str, size_t size, StackWalkFeatures f) {
snprintf(str, size, "%s %s %s %s %s %s %s %s %s %s %s",
f.unknown_java ? "unknown_java" : "-",
f.unwind_stub ? "unwind_stub" : "-",
f.unwind_comp ? "unwind_comp" : "-",
f.unwind_native ? "unwind_native" : "-",
f.java_anchor ? "java_anchor" : "-",
f.gc_traces ? "gc_traces" : "-",
f.stats ? "stats" : "-",
f.probe_sp ? "probesp" : "-",
f.vtable_target ? "vtable" : "-",
f.comp_task ? "comptask" : "-",
f.pc_addr ? "pcaddr" : "-");
return str;
static const char* getFeaturesString(char* str, size_t size, StackWalkFeatures f) {
int chars = snprintf(str, size, "%s%s%s%s%s%s%s",
f.stats ? ",stats" : "",
f.jnienv ? ",jnienv" : "",
f.agct ? ",agct" : "",
f.mixed ? ",mixed" : "",
f.vtable_target ? ",vtable" : "",
f.comp_task ? ",comptask" : "",
f.pc_addr ? ",pcaddr" : "");
return chars > 0 ? str + 1 : "";
}
void flush(Buffer* buf) {
ssize_t result = write(_in_memory ? _memfd : _fd, buf->data(), buf->offset());
if (result > 0) {
atomicInc(_bytes_written, result);
atomicInc(_bytes_written, (u64)result);
}
buf->reset();
}
@@ -767,11 +588,11 @@ class Recording {
buf->putVar32(0);
buf->putVar32(0x7fffffff); // must not clash with JFR metadata ID, or 'jfr print' will break
std::vector<std::string>& strings = JfrMetadata::strings();
const Index& strings = JfrMetadata::strings();
buf->putVar32(strings.size());
for (int i = 0; i < strings.size(); i++) {
buf->putUtf8(strings[i].c_str());
}
strings.forEachOrdered([&] (size_t idx, const std::string& s) {
buf->putUtf8(s.c_str());
});
writeElement(buf, JfrMetadata::root());
@@ -808,6 +629,7 @@ class Recording {
}
void writeSettings(Buffer* buf, Arguments& args) {
assert(args._cstack < sizeof(SETTING_CSTACK) / sizeof(char*));
writeStringSetting(buf, T_ACTIVE_RECORDING, "version", PROFILER_VERSION);
writeStringSetting(buf, T_ACTIVE_RECORDING, "engine", Profiler::instance()->_engine->type());
writeStringSetting(buf, T_ACTIVE_RECORDING, "cstack", SETTING_CSTACK[args._cstack]);
@@ -816,12 +638,13 @@ class Recording {
writeStringSetting(buf, T_ACTIVE_RECORDING, "filter", args._filter);
writeStringSetting(buf, T_ACTIVE_RECORDING, "begin", args._begin);
writeStringSetting(buf, T_ACTIVE_RECORDING, "end", args._end);
writeListSetting(buf, T_ACTIVE_RECORDING, "include", args._buf, args._include);
writeListSetting(buf, T_ACTIVE_RECORDING, "exclude", args._buf, args._exclude);
writeListSetting(buf, T_ACTIVE_RECORDING, "include", args._include);
writeListSetting(buf, T_ACTIVE_RECORDING, "exclude", args._exclude);
writeIntSetting(buf, T_ACTIVE_RECORDING, "jstackdepth", args._jstackdepth);
writeIntSetting(buf, T_ACTIVE_RECORDING, "jfropts", args._jfr_options);
writeIntSetting(buf, T_ACTIVE_RECORDING, "chunksize", args._chunk_size);
writeIntSetting(buf, T_ACTIVE_RECORDING, "chunktime", args._chunk_time);
writeIntSetting(buf, T_ACTIVE_RECORDING, "memlimit", args._mem_limit);
char str[256];
writeStringSetting(buf, T_ACTIVE_RECORDING, "features", getFeaturesString(str, sizeof(str), args._features));
@@ -835,6 +658,12 @@ class Recording {
writeIntSetting(buf, T_EXECUTION_SAMPLE, "wall", args._wall);
writeBoolSetting(buf, T_EXECUTION_SAMPLE, "nobatch", args._nobatch);
}
if (args._nativemem >= 0) {
writeIntSetting(buf, T_MALLOC, "nativemem", args._nativemem);
}
if (args._nativelock >= 0) {
writeIntSetting(buf, T_NATIVE_LOCK, "nativelock", args._nativelock);
}
writeBoolSetting(buf, T_ALLOC_IN_NEW_TLAB, "enabled", args._alloc >= 0);
writeBoolSetting(buf, T_ALLOC_OUTSIDE_TLAB, "enabled", args._alloc >= 0);
@@ -849,6 +678,14 @@ class Recording {
writeIntSetting(buf, T_MONITOR_ENTER, "lock", args._lock);
}
writeBoolSetting(buf, T_METHOD_TRACE, "enabled", !args._trace.empty());
writeListSetting(buf, T_METHOD_TRACE, "trace", args._trace);
writeBoolSetting(buf, T_PROCESS_SAMPLE, "enabled", args._proc > 0);
if (args._proc > 0) {
writeIntSetting(buf, T_PROCESS_SAMPLE, "proc", args._proc);
}
writeBoolSetting(buf, T_ACTIVE_RECORDING, "debugSymbols", VM::loaded() && VMStructs::libjvm()->hasDebugSymbols());
writeBoolSetting(buf, T_ACTIVE_RECORDING, "kernelSymbols", Symbols::haveKernelSymbols());
}
@@ -874,10 +711,9 @@ class Recording {
writeStringSetting(buf, category, key, str);
}
void writeListSetting(Buffer* buf, int category, const char* key, const char* base, int offset) {
while (offset != 0) {
writeStringSetting(buf, category, key, base + offset);
offset = ((int*)(base + offset))[-1];
void writeListSetting(Buffer* buf, int category, const char* key, const std::vector<const char*>& list) {
for (const char* s : list) {
writeStringSetting(buf, category, key, s);
}
}
@@ -993,9 +829,11 @@ class Recording {
buf->putVar32(0);
buf->putVar32(1);
buf->putVar32(10);
buf->putVar32(11);
Lookup lookup(&_method_map, Profiler::instance()->classMap());
Index packages(1);
Index symbols(1);
Lookup lookup(&_method_map, Profiler::instance()->classMap(), &packages, &symbols, OUTPUT_JFR);
writeFrameTypes(buf);
writeThreadStates(buf);
writeGCWhen(buf);
@@ -1005,6 +843,10 @@ class Recording {
writeClasses(buf, &lookup);
writePackages(buf, &lookup);
writeSymbols(buf, &lookup);
writeUserEventTypes(buf);
// Write log levels last. The order does not affect the JFR's validity,
// but log levels have an easily-visible format that makes it easy
// to see if a JFR file has been accidentally truncated.
writeLogLevels(buf);
}
@@ -1150,38 +992,31 @@ class Recording {
writePoolHeader(buf, T_CLASS, classes.size());
for (std::map<u32, const char*>::const_iterator it = classes.begin(); it != classes.end(); ++it) {
const char* name = it->second;
buf->putVar32(it->first);
buf->putVar32(0); // classLoader
buf->putVar64(lookup->getSymbol(name) | _base_id);
buf->putVar64(lookup->getPackage(name) | _base_id);
buf->putVar64(lookup->_symbols->indexOf(it->second) | _base_id);
buf->putVar64(lookup->getPackage(it->second) | _base_id);
buf->putVar32(0); // access flags
flushIfNeeded(buf);
}
}
void writePackages(Buffer* buf, Lookup* lookup) {
std::map<u32, const char*> packages;
lookup->_packages.collect(packages);
writePoolHeader(buf, T_PACKAGE, packages.size());
for (std::map<u32, const char*>::const_iterator it = packages.begin(); it != packages.end(); ++it) {
buf->putVar64(it->first | _base_id);
buf->putVar64(lookup->getSymbol(it->second) | _base_id);
writePoolHeader(buf, T_PACKAGE, lookup->_packages->size());
lookup->_packages->forEachOrdered([&] (size_t idx, const std::string& s) {
buf->putVar64(idx | _base_id);
buf->putVar64(lookup->_symbols->indexOf(s) | _base_id);
flushIfNeeded(buf);
}
});
}
void writeSymbols(Buffer* buf, Lookup* lookup) {
std::map<u32, const char*> symbols;
lookup->_symbols.collect(symbols);
writePoolHeader(buf, T_SYMBOL, symbols.size());
for (std::map<u32, const char*>::const_iterator it = symbols.begin(); it != symbols.end(); ++it) {
writePoolHeader(buf, T_SYMBOL, lookup->_symbols->size());
lookup->_symbols->forEachOrdered([&] (size_t idx, const std::string& s) {
flushIfNeeded(buf, RECORDING_BUFFER_LIMIT - MAX_STRING_LENGTH);
buf->putVar64(it->first | _base_id);
buf->putUtf8(it->second);
}
buf->putVar64(idx | _base_id);
buf->putUtf8(s.c_str());
});
}
void writeLogLevels(Buffer* buf) {
@@ -1193,6 +1028,18 @@ class Recording {
}
}
void writeUserEventTypes(Buffer* buf) {
std::map<u32, const char*> events;
UserEvents::collect(events);
writePoolHeader(buf, T_USER_EVENT_TYPE, events.size());
for (std::map<u32, const char*>::const_iterator it = events.begin(); it != events.end(); ++it) {
flushIfNeeded(buf, RECORDING_BUFFER_LIMIT - MAX_STRING_LENGTH);
buf->putVar32(it->first);
buf->putUtf8(it->second);
}
}
void recordExecutionSample(Buffer* buf, int tid, u32 call_trace_id, ExecutionEvent* event) {
int start = buf->skip(1);
buf->put8(T_EXECUTION_SAMPLE);
@@ -1203,6 +1050,17 @@ class Recording {
buf->put8(start, buf->offset() - start);
}
void recordMethodTrace(Buffer* buf, int tid, u32 call_trace_id, MethodTraceEvent* event) {
int start = buf->skip(1);
buf->put8(T_METHOD_TRACE);
buf->putVar64(event->_start_time);
buf->putVar64(event->_duration);
buf->putVar32(tid);
buf->putVar32(call_trace_id);
buf->putVar32(0); // TODO: method
buf->put8(start, buf->offset() - start);
}
void recordWallClockSample(Buffer* buf, int tid, u32 call_trace_id, WallClockEvent* event) {
int start = buf->skip(1);
buf->put8(T_WALL_CLOCK_SAMPLE);
@@ -1211,6 +1069,7 @@ class Recording {
buf->putVar32(call_trace_id);
buf->putVar32(event->_thread_state);
buf->putVar32(event->_samples);
buf->putVar64(event->_time_span);
buf->put8(start, buf->offset() - start);
}
@@ -1250,6 +1109,59 @@ class Recording {
buf->put8(start, buf->offset() - start);
}
void recordUserEvent(Buffer* buf, int tid, UserEvent* event) {
// estimate of size of non-string fields of this event
const size_t event_non_string_size_limit = 64;
// When calling recordUserEvent, the buffer can be up to RECORDING_BUFFER_LIMIT bytes full.
// Check that the buffer is not exceeded.
static_assert(RECORDING_BUFFER_LIMIT + event_non_string_size_limit + ASPROF_MAX_JFR_EVENT_LENGTH
<= RECORDING_BUFFER_SIZE, "output must fit within recording buffer");
int start = buf->skip(5);
buf->put8(T_USER_EVENT);
buf->putVar64(event->_start_time);
buf->putVar32(tid);
buf->putVar32(event->_type);
buf->putByteString((const char*)event->_data,
event->_len > ASPROF_MAX_JFR_EVENT_LENGTH ? ASPROF_MAX_JFR_EVENT_LENGTH : event->_len);
buf->putVar32(start, buf->offset() - start);
}
void recordProcessSample(Buffer* buf, const ProcessInfo* info) {
int start = buf->skip(5);
buf->put8(T_PROCESS_SAMPLE);
buf->putVar64(TSC::ticks());
buf->putVar32(info->pid);
buf->putVar32(info->ppid);
buf->putByteString(info->name);
buf->putByteString(info->cmdline);
buf->putVar32(info->uid);
buf->put8(info->state);
buf->putVar64(info->start_time);
buf->putFloat(info->cpu_user);
buf->putFloat(info->cpu_system);
buf->putFloat(info->cpu_percent);
buf->putVar32(info->threads);
buf->putVar64(info->vm_size);
buf->putVar64(info->vm_rss);
buf->putVar64(info->rss_anon);
buf->putVar64(info->rss_files);
buf->putVar64(info->rss_shmem);
buf->putVar64(info->minor_faults);
buf->putVar64(info->major_faults);
buf->putVar64(info->io_read);
buf->putVar64(info->io_write);
buf->putVar32(start, buf->offset() - start);
}
void recordLiveObject(Buffer* buf, int tid, u32 call_trace_id, LiveObject* event) {
int start = buf->skip(1);
buf->put8(T_LIVE_OBJECT);
@@ -1289,6 +1201,17 @@ class Recording {
buf->put8(start, buf->offset() - start);
}
void recordNativeLockSample(Buffer* buf, int tid, u32 call_trace_id, NativeLockEvent* event) {
int start = buf->skip(1);
buf->put8(T_NATIVE_LOCK);
buf->putVar64(event->_start_time);
buf->putVar64(event->_end_time - event->_start_time);
buf->putVar32(tid);
buf->putVar32(call_trace_id);
buf->putVar64(event->_address);
buf->put8(start, buf->offset() - start);
}
void recordWindow(Buffer* buf, int tid, ProfilingWindow* event) {
int start = buf->skip(1);
buf->put8(T_WINDOW);
@@ -1417,6 +1340,8 @@ bool FlightRecorder::timerTick(u64 wall_time, u32 gc_id) {
_rec->cpuMonitorCycle();
_rec->heapMonitorCycle(gc_id);
_rec->processMonitorCycle(wall_time);
bool need_switch_chunk = _rec->needSwitchChunk(wall_time);
_rec_lock.unlockShared();
@@ -1437,7 +1362,7 @@ Error FlightRecorder::startMasterRecording(Arguments& args, const char* filename
jclass cls = env->DefineClass(JFR_SYNC_NAME, NULL, (const jbyte*)JFR_SYNC_CLASS, INCBIN_SIZEOF(JFR_SYNC_CLASS));
if (cls == NULL || env->RegisterNatives(cls, &native_method, 1) != 0
|| (_start_method = env->GetStaticMethodID(cls, "start", "(Ljava/lang/String;Ljava/lang/String;I)V")) == NULL
|| (_stop_method = env->GetStaticMethodID(cls, "stop", "()V")) == NULL
|| (_stop_method = env->GetStaticMethodID(cls, "stop", "()Z")) == NULL
|| (_box_method = env->GetStaticMethodID(cls, "box", "(I)Ljava/lang/Integer;")) == NULL
|| (_jfr_sync_class = (jclass)env->NewGlobalRef(cls)) == NULL) {
env->ExceptionDescribe();
@@ -1469,12 +1394,13 @@ Error FlightRecorder::startMasterRecording(Arguments& args, const char* filename
jobject jfilename = env->NewStringUTF(filename);
jobject jsettings = args._jfr_sync == NULL ? NULL : env->NewStringUTF(args._jfr_sync);
int event_mask = (args._event != NULL ? 1 : 0) |
(args._alloc >= 0 ? 2 : 0) |
(args._lock >= 0 ? 4 : 0) |
((args._jfr_options ^ JFR_SYNC_OPTS) << 4);
int event_mask = args.eventMask() |
((args._jfr_options ^ JFR_SYNC_OPTS) << EVENT_MASK_SIZE);
storeRelease(_jfr_starting, true);
env->CallStaticVoidMethod(_jfr_sync_class, _start_method, jfilename, jsettings, event_mask);
storeRelease(_jfr_starting, false);
if (env->ExceptionCheck()) {
env->ExceptionDescribe();
@@ -1486,7 +1412,9 @@ Error FlightRecorder::startMasterRecording(Arguments& args, const char* filename
void FlightRecorder::stopMasterRecording() {
JNIEnv* env = VM::jni();
env->CallStaticVoidMethod(_jfr_sync_class, _stop_method);
if (env->CallStaticBooleanMethod(_jfr_sync_class, _stop_method) == JNI_FALSE) {
Log::warn("Failed to stop JFR recording");
}
env->ExceptionClear();
}
@@ -1504,6 +1432,9 @@ void FlightRecorder::recordEvent(int lock_index, int tid, u32 call_trace_id,
case INSTRUMENTED_METHOD:
_rec->recordExecutionSample(buf, tid, call_trace_id, (ExecutionEvent*)event);
break;
case METHOD_TRACE:
_rec->recordMethodTrace(buf, tid, call_trace_id, (MethodTraceEvent*)event);
break;
case WALL_CLOCK_SAMPLE:
_rec->recordWallClockSample(buf, tid, call_trace_id, (WallClockEvent*)event);
break;
@@ -1525,9 +1456,15 @@ void FlightRecorder::recordEvent(int lock_index, int tid, u32 call_trace_id,
case PARK_SAMPLE:
_rec->recordThreadPark(buf, tid, call_trace_id, (LockEvent*)event);
break;
case NATIVE_LOCK_SAMPLE:
_rec->recordNativeLockSample(buf, tid, call_trace_id, (NativeLockEvent*)event);
break;
case PROFILING_WINDOW:
_rec->recordWindow(buf, tid, (ProfilingWindow*)event);
break;
case USER_EVENT:
_rec->recordUserEvent(buf, tid, (UserEvent*)event);
break;
}
_rec->flushIfNeeded(buf);
_rec->addThread(tid);
@@ -1554,3 +1491,7 @@ void FlightRecorder::recordLog(LogLevel level, const char* message, size_t len)
_rec_lock.unlockShared();
}
bool FlightRecorder::isJfrStarting() {
return loadAcquire(_jfr_starting);
}

View File

@@ -21,6 +21,8 @@ class FlightRecorder {
void stopMasterRecording();
public:
static const LogLevel MIN_LOG_LEVEL = LogLevel::LOG_DEBUG;
FlightRecorder() : _rec(NULL) {
}
@@ -38,6 +40,8 @@ class FlightRecorder {
EventType event_type, Event* event);
void recordLog(LogLevel level, const char* message, size_t len);
static bool isJfrStarting();
};
#endif // _FLIGHTRECORDER_H

View File

@@ -6,6 +6,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "callTraceStorage.h"
#include "demangle.h"
#include "frameName.h"
#include "profiler.h"
@@ -44,6 +45,8 @@ Matcher::Matcher(const Matcher& m) {
}
Matcher& Matcher::operator=(const Matcher& m) {
if (this == &m) return *this;
free(_pattern);
_type = m._type;
@@ -86,8 +89,8 @@ FrameName::FrameName(Arguments& args, int style, int epoch, Mutex& thread_names_
// Require printf to use standard C format regardless of system locale
_saved_locale = uselocale(newlocale(LC_NUMERIC_MASK, "C", (locale_t)0));
buildFilter(_include, args._buf, args._include);
buildFilter(_exclude, args._buf, args._exclude);
for (const char* s : args._include) _include.push_back(s);
for (const char* s : args._exclude) _exclude.push_back(s);
Profiler::instance()->classMap()->collect(_class_names);
}
@@ -109,13 +112,6 @@ FrameName::~FrameName() {
freelocale(uselocale(_saved_locale));
}
void FrameName::buildFilter(std::vector<Matcher>& vector, const char* base, int offset) {
while (offset != 0) {
vector.push_back(base + offset);
offset = ((int*)(base + offset))[-1];
}
}
const char* FrameName::decodeNativeSymbol(const char* name) {
const char* lib_name = (_style & STYLE_LIB_NAMES) ? Profiler::instance()->getLibraryName(name) : NULL;
@@ -153,13 +149,9 @@ const char* FrameName::typeSuffix(FrameTypeId type) {
}
void FrameName::javaMethodName(jmethodID method) {
if (VMStructs::hasMethodStructs()) {
// Workaround for JDK-8313816
VMMethod* vm_method = VMMethod::fromMethodID(method);
if (vm_method == NULL || vm_method->id() == NULL) {
_str.assign("[stale_jmethodID]");
return;
}
if (VMMethod::isStaleMethodId(method)) {
_str.assign("[stale_jmethodID]");
return;
}
jclass method_class = NULL;
@@ -184,6 +176,8 @@ void FrameName::javaMethodName(jmethodID method) {
}
_str.append(method_sig);
}
} else if (err == JVMTI_ERROR_INVALID_METHODID) {
_str.assign("[stale_jmethodID]");
} else {
char buf[32];
snprintf(buf, sizeof(buf), "[jvmtiError %d]", err);
@@ -301,6 +295,13 @@ const char* FrameName::name(ASGCT_CallFrame& frame, bool for_matching) {
case BCI_ERROR:
return _str.assign("[").append((const char*)frame.method_id).append("]").c_str();
case BCI_CPU: {
int cpu = ((int)(uintptr_t)frame.method_id) & 0x7fff;
char buf[32];
snprintf(buf, sizeof(buf), "[CPU-%d]", cpu);
return _str.assign(buf).c_str();
}
default: {
const char* type_suffix = typeSuffix(FrameType::decode(frame.bci));
@@ -354,6 +355,7 @@ FrameTypeId FrameName::type(ASGCT_CallFrame& frame) {
case BCI_THREAD_ID:
case BCI_ADDRESS:
case BCI_ERROR:
case BCI_CPU:
return FRAME_NATIVE;
default:
@@ -378,3 +380,24 @@ bool FrameName::exclude(const char* frame_name) {
}
return false;
}
bool FrameName::excludeTrace(CallTrace* trace) {
bool check_include = !_include.empty();
bool check_exclude = !_exclude.empty();
if (!(check_include || check_exclude)) {
return false;
}
for (int i = 0; i < trace->num_frames; i++) {
const char* frame_name = name(trace->frames[i], true);
if (check_exclude && exclude(frame_name)) {
return true;
}
if (check_include && include(frame_name)) {
check_include = false;
if (!check_exclude) break;
}
}
return check_include;
}

View File

@@ -24,6 +24,7 @@ typedef std::map<jmethodID, std::string> JMethodCache;
typedef std::map<int, std::string> ThreadMap;
typedef std::map<unsigned int, const char*> ClassMap;
class CallTrace;
enum MatchType {
MATCH_EQUALS,
@@ -66,11 +67,12 @@ class FrameName {
ThreadMap& _thread_names;
locale_t _saved_locale;
void buildFilter(std::vector<Matcher>& vector, const char* base, int offset);
const char* decodeNativeSymbol(const char* name);
const char* typeSuffix(FrameTypeId type);
void javaMethodName(jmethodID method);
void javaClassName(const char* symbol, size_t length, int style);
bool include(const char* frame_name);
bool exclude(const char* frame_name);
public:
FrameName(Arguments& args, int style, int epoch, Mutex& thread_names_lock, ThreadMap& thread_names);
@@ -79,11 +81,7 @@ class FrameName {
const char* name(ASGCT_CallFrame& frame, bool for_matching = false);
FrameTypeId type(ASGCT_CallFrame& frame);
bool hasIncludeList() { return !_include.empty(); }
bool hasExcludeList() { return !_exclude.empty(); }
bool include(const char* frame_name);
bool exclude(const char* frame_name);
bool excludeTrace(CallTrace* trace);
};
#endif // _FRAMENAME_H

View File

@@ -13,5 +13,20 @@ public class Instrument {
private Instrument() {
}
public static native void recordSample();
public static native void recordEntry();
public static void recordExit(long startTimeNs, long minLatency) {
if (System.nanoTime() - startTimeNs >= minLatency) {
recordExit0(startTimeNs);
}
}
// Overload used when latency=0, we don't call recordExit0
// directly to have the same number of additional frames as
// the standard path.
public static void recordExit(long startTimeNs) {
recordExit0(startTimeNs);
}
public static native void recordExit0(long startTimeNs);
}

View File

@@ -16,11 +16,27 @@ import java.nio.file.NoSuchFileException;
import java.nio.file.Paths;
import java.text.ParseException;
import java.util.StringTokenizer;
import java.util.concurrent.locks.LockSupport;
/**
* Synchronize async-profiler recording with an existing JFR recording.
*/
class JfrSync implements FlightRecorderListener {
// Keep in sync with EventMask
private static final int EM_CPU = 1;
private static final int EM_ALLOC = 2;
private static final int EM_LOCK = 4;
// Keep in sync with EVENT_MASK_SIZE in C++
private static final int EVENT_MASK_SIZE = 7;
// Keep in sync with JfrOption
private static final int NO_SYSTEM_INFO = 1;
private static final int NO_SYSTEM_PROPS = 2;
private static final int NO_NATIVE_LIBS = 4;
private static final int NO_CPU_LOAD = 8;
private static final int NO_HEAP_SUMMARY = 16;
private static volatile Recording masterRecording;
private JfrSync() {
@@ -62,46 +78,63 @@ class JfrSync implements FlightRecorderListener {
recording.start();
}
public static void stop() {
public static boolean stop() {
Recording recording = masterRecording;
if (recording != null) {
// Disable state change notification before stopping
masterRecording = null;
recording.stop();
try {
recording.stop();
} catch (IllegalStateException e) {
// Workaround the JDK issue: JFR shutdown hook may stop the recording concurrently
// then populate the target file outside the state lock.
// Once the file is completely written, the recording state is changed to CLOSED.
for (int pause = 10; recording.getState() != RecordingState.CLOSED && pause < 1000; pause *= 2) {
LockSupport.parkNanos(pause * 1_000_000L);
}
return recording.getState() == RecordingState.CLOSED;
}
}
return true;
}
private static void disableBuiltinEvents(Recording recording, int eventMask) {
if ((eventMask & 1) != 0) {
if ((eventMask & EM_CPU) != 0) {
recording.disable("jdk.ExecutionSample");
recording.disable("jdk.NativeMethodSample");
}
if ((eventMask & 2) != 0) {
if ((eventMask & EM_ALLOC) != 0) {
recording.disable("jdk.ObjectAllocationInNewTLAB");
recording.disable("jdk.ObjectAllocationOutsideTLAB");
recording.disable("jdk.ObjectAllocationSample");
recording.disable("jdk.OldObjectSample");
}
if ((eventMask & 4) != 0) {
if ((eventMask & EM_LOCK) != 0) {
recording.disable("jdk.JavaMonitorEnter");
recording.disable("jdk.ThreadPark");
}
// No built-in event related to EM_WALL
// No built-in event related to EM_NATIVEMEM
// No built-in event related to EM_NATIVELOCK
// No need to disable built-in event related to EM_METHOD_TRACE
eventMask >>>= EVENT_MASK_SIZE;
// Shifted JfrOption values
if ((eventMask & 0x10) != 0) {
if ((eventMask & NO_SYSTEM_INFO) != 0) {
recording.disable("jdk.OSInformation");
recording.disable("jdk.CPUInformation");
recording.disable("jdk.JVMInformation");
}
if ((eventMask & 0x20) != 0) {
if ((eventMask & NO_SYSTEM_PROPS) != 0) {
recording.disable("jdk.InitialSystemProperty");
}
if ((eventMask & 0x40) != 0) {
if ((eventMask & NO_NATIVE_LIBS) != 0) {
recording.disable("jdk.NativeLibrary");
}
if ((eventMask & 0x80) != 0) {
if ((eventMask & NO_CPU_LOAD) != 0) {
recording.disable("jdk.CPULoad");
}
if ((eventMask & 0x100) != 0) {
if ((eventMask & NO_HEAP_SUMMARY) != 0) {
recording.disable("jdk.GCHeapSummary");
}
}

Binary file not shown.

View File

@@ -17,7 +17,7 @@ import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicInteger;
class Server extends Thread implements Executor, HttpHandler {
private static final String[] COMMANDS = "start,resume,stop,dump,check,status,meminfo,list,version".split(",");
private static final String[] COMMANDS = "start,resume,stop,dump,status,metrics,list,version".split(",");
private final HttpServer server;
private final AtomicInteger threadNum = new AtomicInteger();

View File

@@ -12,7 +12,9 @@
#include "asprof.h"
#include "cpuEngine.h"
#include "mallocTracer.h"
#include "nativeLockTracer.h"
#include "profiler.h"
#include "symbols.h"
#define ADDRESS_OF(sym) ({ \
@@ -99,6 +101,7 @@ static void* dlopen_hook_impl(const char* filename, int flags, bool patch) {
Hooks::patchLibraries();
}
MallocTracer::installHooks();
NativeLockTracer::installHooks();
}
return result;
}
@@ -182,6 +185,11 @@ void Hooks::patchLibraries() {
while (_patched_libs < native_lib_count) {
CodeCache* cc = (*native_libs)[_patched_libs++];
UnloadProtection handle(cc);
if (!handle.isValid()) {
continue;
}
if (!cc->contains((const void*)Hooks::init)) {
// Let libasyncProfiler always use original dlopen
cc->patchImport(im_dlopen, (void*)dlopen_hook);

Some files were not shown because too many files have changed in this diff Show More