mirror of
https://github.com/async-profiler/async-profiler.git
synced 2026-04-28 10:53:49 +00:00
Compare commits
52 Commits
499904dce7
...
7bd911a007
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7bd911a007 | ||
|
|
2df2733d1d | ||
|
|
4d5441f2cd | ||
|
|
cc9e91bd8f | ||
|
|
e899de6a9c | ||
|
|
fbc3942095 | ||
|
|
6afb9572c1 | ||
|
|
f763e195ee | ||
|
|
f1b87ead07 | ||
|
|
4dda6c40af | ||
|
|
264b8ab5da | ||
|
|
c383a35ff4 | ||
|
|
82ae80a660 | ||
|
|
7e92b5cdac | ||
|
|
fe69e4fab2 | ||
|
|
d94581c24c | ||
|
|
f3c31942fb | ||
|
|
a246ced814 | ||
|
|
8d653dd5e0 | ||
|
|
cc0eab1789 | ||
|
|
842b612e08 | ||
|
|
ff4336d136 | ||
|
|
e1dd4c05f6 | ||
|
|
174dc31d88 | ||
|
|
dbd9fc7520 | ||
|
|
dc69cf4b80 | ||
|
|
abc8b7f493 | ||
|
|
4ea8e5bbb6 | ||
|
|
71ad47a46e | ||
|
|
0023021ddf | ||
|
|
444d0e6353 | ||
|
|
7e2ed0e77e | ||
|
|
68244fbf6f | ||
|
|
31042f13bc | ||
|
|
a3c6d92d39 | ||
|
|
036c87e50d | ||
|
|
15b1161f57 | ||
|
|
b02434bd9d | ||
|
|
9c293283f2 | ||
|
|
3aba5ee521 | ||
|
|
078935591f | ||
|
|
dc88d3f756 | ||
|
|
a071e8a2f8 | ||
|
|
6e6acc1769 | ||
|
|
709a777393 | ||
|
|
b9d6843ae5 | ||
|
|
3722d05ba0 | ||
|
|
872be63220 | ||
|
|
a89d7ddeba | ||
|
|
f789c4f748 | ||
|
|
d43d328b58 | ||
|
|
037c09906d |
@@ -3,34 +3,47 @@
|
||||
<head>
|
||||
<meta charset='utf-8'>
|
||||
<style>
|
||||
body {margin: 0; padding: 10px 10px 22px 10px; background-color: #ffffff}
|
||||
:root {--bg: #ffffff; --fg: #000000; --hl-bg: #ffffe0; --hl-border: #ffc000; --link: #0366d6; --legend-bg: #ffffe0; --legend-border: #666666}
|
||||
:root.dark {--bg: #1e1e1e; --fg: #cccccc; --hl-bg: #3a3a00; --hl-border: #8a7000; --link: #58a6ff; --legend-bg: #333333; --legend-border: #888888}
|
||||
body {margin: 0; padding: 10px 10px 22px 10px; background-color: var(--bg); color: var(--fg)}
|
||||
h1 {margin: 5px 0 0 0; font-size: 18px; font-weight: normal; text-align: center}
|
||||
header {margin: -22px 0 6px 0}
|
||||
button {border: none; background: none; width: 24px; height: 24px; cursor: pointer; margin: 0; padding: 2px 0 0 0; text-align: center}
|
||||
button:hover {background-color: #ffffe0; outline: 1px solid #ffc000; border-radius: 4px}
|
||||
button:hover {background-color: var(--hl-bg); outline: 1px solid var(--hl-border); border-radius: 4px}
|
||||
dl {margin: 0 4px 8px 4px}
|
||||
dt {margin: 1px; padding: 2px 0; font-weight: bold}
|
||||
dd {margin: 1px; padding: 2px 4px}
|
||||
dl.frames {float: left; width: 160px}
|
||||
dl.hotkeys {clear: left; border-top: 1px solid #666666}
|
||||
dl.frames > dd {color: #000000}
|
||||
dl.hotkeys {clear: left; border-top: 1px solid var(--legend-border)}
|
||||
dl.hotkeys > dt {float: left; clear: left; width: 158px; margin-right: 4px; text-align: right}
|
||||
dl.hotkeys > dd {float: left}
|
||||
p {position: fixed; bottom: 0; margin: 0; padding: 2px 3px 2px 3px; outline: 1px solid #ffc000; display: none; overflow: hidden; white-space: nowrap; background-color: #ffffe0}
|
||||
a {color: #0366d6}
|
||||
#legend {padding: 4px; border-radius: 4px; background: #ffffe0; border: 1px solid #666666; display: none}
|
||||
#hl {position: absolute; display: none; overflow: hidden; white-space: nowrap; pointer-events: none; background-color: #ffffe0; outline: 1px solid #ffc000; height: 15px}
|
||||
p {position: fixed; bottom: 0; margin: 0; padding: 2px 3px 2px 3px; outline: 1px solid var(--hl-border); display: none; overflow: hidden; white-space: nowrap; background-color: var(--hl-bg); color: var(--fg)}
|
||||
a {color: var(--link)}
|
||||
#legend {padding: 4px; border-radius: 4px; background: var(--legend-bg); border: 1px solid var(--legend-border); display: none}
|
||||
#hl {position: absolute; display: none; overflow: hidden; white-space: nowrap; pointer-events: none; background-color: var(--hl-bg); outline: 1px solid var(--hl-border); height: 15px}
|
||||
#hl span {padding: 0 3px 0 3px}
|
||||
#status {left: 0}
|
||||
#match {right: 0}
|
||||
#reset {cursor: pointer}
|
||||
#canvas {width: 100%; height: 576px}
|
||||
</style>
|
||||
<script>
|
||||
{
|
||||
let theme;
|
||||
try { theme = localStorage.getItem('flame-theme'); } catch (ignored) {}
|
||||
if (theme ? theme === 'dark' : matchMedia('(prefers-color-scheme: dark)').matches) {
|
||||
document.documentElement.classList.add('dark');
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body style='font: 12px Verdana, sans-serif'>
|
||||
<h1>CPU profile</h1>
|
||||
<header style='float: left'>
|
||||
<button id='inverted' title='Invert (I)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 392 392'><path d='M196,36 L316,156 L76,156 Z' fill='#004d80'/><path d='M196,356 L76,236 L316,236 Z' fill='#004d80'/><path d='M196,54 L298,156 L94,156 Z' fill='#ff8d40'/><path d='M196,338 L94,236 L298,236 Z' fill='#40b2ff'/><rect x='94' y='188' width='204' height='16' fill='#004d80'/></svg></button>
|
||||
<button id='search' title='Search (Ctrl+F)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='-39.3 -39.3 471.1 471.1'><circle cx='147.7' cy='147.8' r='125.9' fill='#fff'/><path fill='#40b2ff' d='M370.7 348.7c0 1.4-1.6 6.3-7.2 12.3-6.2 6.7-12.5 9.8-14.7 9.8h-.1c-19.5-1.6-62-43.2-109.6-106.8 9.2-7.2 17.5-15.5 24.6-24.6 63.6 47.6 105.2 90.2 106.8 109.6z'/><path fill='#ff8d40' d='M208.7 86.9l-14.5 14.5c-17.1 17.1-46.5 5-46.5-19.3V61.6c-49 0-88.4 40.8-86.1 90.2 2 43.9 38.1 80 82 82 49.5 2.3 90.2-37.2 90.2-86.1 0-23.7-9.6-45.2-25.1-60.8z'/><path fill='#004d80' d='M276.1 221c12.3-21.5 19.5-46.5 19.5-73.2C295.6 66.3 229.2.1 147.7.1S0 66.3 0 147.9s66.3 147.7 147.7 147.7c26.6 0 51.5-7.1 73.2-19.5 39.8 53.3 91.9 113.5 126.1 116.4 12.3.5 22.9-6.7 32.8-16.7 5.2-5.6 13.8-16.9 12.8-28.8-2.9-34.1-63.1-86.2-116.4-126.1zM147.7 273.8c-69.5 0-125.9-56.5-125.9-125.9S78.3 21.9 147.7 21.9 273.6 78.4 273.6 147.8s-56.4 126-125.9 126zm215.9 87.2c-6.2 6.7-12.4 9.8-14.7 9.8h-.1c-19.5-1.6-62-43.2-109.6-106.8 9.2-7.2 17.5-15.5 24.6-24.6 63.6 47.6 105.2 90.2 106.8 109.6 0 1.4-1.6 6.3-7.2 12.4z'/></svg></button>
|
||||
<button id='darkmode' title='Toggle dark mode (D)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 20 20'><path d='M10 4a6 6 0 0 1 0 12z' fill='#ff8d40'/><path d='M10 4a6 6 0 0 0 0 12z' fill='#ffffff'/><circle cx='10' cy='10' r='8' fill='none' stroke='#004d80'/></svg></button>
|
||||
<button id='info'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 20 20'><circle cx='10' cy='10' r='8' stroke='#004d80' fill='none'/><path d='M10 5.5c-1.25 0-2.25 1-2.25 2.25H9a1.25 1.25 0 0 1 2.5 0c0 .65-.55 1-1 1.2-.7.35-1.25.85-1.25 1.8V11h1.5v-.25c0-.37.29-.65.68-.83.73-.34 1.32-.87 1.32-2.17 0-1.25-1.5-2.25-2.75-2.25' fill='#ff8d40' stroke='#ff8d40' stroke-width='.6' stroke-linecap='round' stroke-linejoin='round'/><circle cx='10' cy='13.5' r='1.2' fill='#ff8d40'/></svg></button>
|
||||
</header>
|
||||
<header style='float: right'>Produced by <a href='https://github.com/async-profiler/async-profiler'>async-profiler</a></header>
|
||||
@@ -57,7 +70,7 @@
|
||||
</dl>
|
||||
<dl class='hotkeys'>
|
||||
<dt>Click frame</dt><dd>Zoom into frame</dd>
|
||||
<dt>Alt+Click</dt><dd>Remove stack</dd>
|
||||
<dt>Ctrl/Alt+Click</dt><dd>Remove stack</dd>
|
||||
<dt>0</dt><dd>Reset zoom</dd>
|
||||
<dt>I</dt><dd>Invert graph</dd>
|
||||
<dt>Ctrl+F</dt><dd>Search</dd>
|
||||
@@ -75,9 +88,11 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
'use strict';
|
||||
let root, px, pattern;
|
||||
let level0 = 0, left0 = 0, width0 = 0;
|
||||
let level0 = 0, left0 = 0, width0 = 0, d = 0;
|
||||
let nav = [], navIndex, matchval;
|
||||
let inverted = false;
|
||||
const U = undefined;
|
||||
const maxdiff = -1;
|
||||
const levels = Array(36);
|
||||
for (let h = 0; h < levels.length; h++) {
|
||||
levels[h] = [];
|
||||
@@ -111,10 +126,18 @@
|
||||
return '#' + (p[0] + ((p[1] * v) << 16 | (p[2] * v) << 8 | (p[3] * v))).toString(16);
|
||||
}
|
||||
|
||||
function getDiffColor(diff) {
|
||||
if (diff === U) return '#ffdd33';
|
||||
if (diff === 0) return '#e0e0e0';
|
||||
const v = Math.round(128 * (maxdiff - Math.abs(diff)) / maxdiff) + 96;
|
||||
return diff > 0 ? 'rgb(255,' + v + ',' + v + ')' : 'rgb(' + v + ',' + v + ',255)';
|
||||
}
|
||||
|
||||
function f(key, level, left, width, inln, c1, int) {
|
||||
levels[level0 = level].push({level, left: left0 += left, width: width0 = width || width0,
|
||||
color: getColor(palette[key & 7]), title: cpool[key >>> 3],
|
||||
details: (int ? ', int=' + int : '') + (c1 ? ', c1=' + c1 : '') + (inln ? ', inln=' + inln : '')
|
||||
color: maxdiff >= 0 ? getDiffColor(d) : getColor(palette[key & 7]),
|
||||
title: cpool[key >>> 3],
|
||||
details: (d ? (d > 0 ? ', +' : ', ') + d : '') + (int ? ', int=' + int : '') + (c1 ? ', c1=' + c1 : '') + (inln ? ', inln=' + inln : '')
|
||||
});
|
||||
}
|
||||
|
||||
@@ -186,8 +209,10 @@
|
||||
}
|
||||
|
||||
function render(newRoot, nav) {
|
||||
const bg = getComputedStyle(document.documentElement).getPropertyValue('--bg');
|
||||
|
||||
if (root) {
|
||||
c.fillStyle = '#ffffff';
|
||||
c.fillStyle = bg;
|
||||
c.fillRect(0, 0, canvasWidth, canvasHeight);
|
||||
}
|
||||
|
||||
@@ -229,7 +254,7 @@
|
||||
}
|
||||
|
||||
if (f.level < root.level) {
|
||||
c.fillStyle = 'rgba(255, 255, 255, 0.5)';
|
||||
c.fillStyle = bg + '80';
|
||||
c.fillRect((f.left - x0) * px, y, f.width * px, 15);
|
||||
}
|
||||
}
|
||||
@@ -266,7 +291,7 @@
|
||||
canvas.title = f.title + '\n(' + samples(f.width) + f.details + ', ' + pct(f.width, levels[0][0].width) + '%)';
|
||||
canvas.style.cursor = 'pointer';
|
||||
canvas.onclick = function() {
|
||||
if (event.altKey && h >= root.level && h > 0) {
|
||||
if ((event.altKey || event.ctrlKey) && h >= root.level && h > 0) {
|
||||
removeStack(f.left, f.width);
|
||||
root.width > f.width ? render(root) : render();
|
||||
} else if (f !== root) {
|
||||
@@ -307,6 +332,12 @@
|
||||
search(false);
|
||||
}
|
||||
|
||||
document.getElementById('darkmode').onclick = function() {
|
||||
const theme = document.documentElement.classList.toggle('dark') ? 'dark' : 'light';
|
||||
try { localStorage.setItem('flame-theme', theme); } catch (ignored) {}
|
||||
render(root);
|
||||
}
|
||||
|
||||
const btnInfo = document.getElementById('info');
|
||||
const legend = document.getElementById('legend');
|
||||
|
||||
@@ -338,6 +369,9 @@
|
||||
canvas.onmouseout();
|
||||
document.getElementById('inverted').onclick();
|
||||
return false;
|
||||
} else if (event.key === 'd') {
|
||||
document.getElementById('darkmode').onclick();
|
||||
return false;
|
||||
} else if (event.key === '0') {
|
||||
canvas.onmouseout();
|
||||
root = levels[0][0];
|
||||
|
||||
BIN
.assets/images/flamegraph_diff.png
Normal file
BIN
.assets/images/flamegraph_diff.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
2
.github/workflows/integ.yml
vendored
2
.github/workflows/integ.yml
vendored
@@ -108,7 +108,7 @@ jobs:
|
||||
make build/test.jar
|
||||
cp -r ${{ steps.extract_artifact.outputs.release_directory }}/bin build
|
||||
cp -r ${{ steps.extract_artifact.outputs.release_directory }}/lib build
|
||||
make test-java RETRY_COUNT=${{ inputs.retry-count }} -j
|
||||
make test-java TEST_THREADS=2 RETRY_COUNT=${{ inputs.retry-count }} -j
|
||||
- name: Upload integration test logs
|
||||
uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
|
||||
@@ -3,6 +3,7 @@ name: CI
|
||||
on: # We are very liberal in terms of triggering builds. This should be revisited if we start seeing a lot of queueing
|
||||
- push
|
||||
- pull_request
|
||||
- workflow_dispatch
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -52,7 +53,7 @@ jobs:
|
||||
|
||||
integ-linux-x64:
|
||||
name: integ / linux-x64
|
||||
needs: build-linux-x64
|
||||
needs: [build-linux-x64, build-jars]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -95,7 +96,7 @@ jobs:
|
||||
|
||||
integ-linux-arm64:
|
||||
name: integ / linux-arm64
|
||||
needs: build-linux-arm64
|
||||
needs: [build-linux-arm64, build-jars]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -115,7 +116,7 @@ jobs:
|
||||
|
||||
integ-macos:
|
||||
name: integ / macos
|
||||
needs: build-macos
|
||||
needs: [build-macos, build-jars]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -146,7 +147,7 @@ jobs:
|
||||
contents: write
|
||||
name: publish (nightly)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-jars, integ-linux-x64, integ-linux-arm64, integ-macos]
|
||||
needs: [integ-linux-x64, integ-linux-arm64, integ-macos]
|
||||
steps:
|
||||
- name: Download async-profiler binaries and jars
|
||||
uses: actions/download-artifact@v4
|
||||
|
||||
@@ -15,6 +15,7 @@ header:
|
||||
- 'src/jattach'
|
||||
- 'src/res'
|
||||
- '**/MANIFEST.MF'
|
||||
- 'test/**/*.collapsed'
|
||||
license:
|
||||
content: |
|
||||
Copyright The async-profiler authors
|
||||
|
||||
75
CHANGELOG.md
75
CHANGELOG.md
@@ -1,5 +1,80 @@
|
||||
# Changelog
|
||||
|
||||
## [4.4]
|
||||
|
||||
### Features
|
||||
|
||||
- #1553: Differential Flame Graphs
|
||||
|
||||
### Improvements
|
||||
|
||||
- #1705: `memlimit` option to limit size of the call trace storage
|
||||
- #1706: Extend syntax of `-j` option to truncate deep stacks
|
||||
- #1720: FlameGraph: Dark mode toggle
|
||||
- #1672: FlameGraph: Use Ctrl+Click in addition to Alt+Click to remove stacks
|
||||
- #1684: Unwind ARM64 generated stubs on JDK 26+
|
||||
- #1676: Make `dwarf` stack walking mode an alias for `vm`
|
||||
- #1671: An option to select TLAB based AllocTracer engine with JDK 11+
|
||||
- #1670: Move converter Main class to the one.convert package
|
||||
- #1660: Provide non-aggregated samples in OTLP converter
|
||||
- #1701, #1682: Speed-up stack walking
|
||||
|
||||
### Breaking changes
|
||||
|
||||
- #1673: Permanently remove `check` command
|
||||
- #1675: Remove unsafe AsyncGetCallTrace recovery tricks along with `safemode` option
|
||||
- #1677: Remove `cstack=lbr` option
|
||||
|
||||
### Bug fixes
|
||||
|
||||
- #1716: Wall-clock Heatmap does not count samples correctly
|
||||
- #1715: Fix Zing crash when profiling cpu+wall together
|
||||
- #1708: Another fix for correct vDSO unwinding on ARM64
|
||||
- #1707: Workaround for JFR shutdown race
|
||||
- #1699: Allow negative keys in JFR constant pool
|
||||
- #1697: Ensure remaining buffer is sufficient for event data in JfrReader
|
||||
- #1654: Prefer perf-events engine when record-cpu or target-cpu are selected
|
||||
- #1585: Scale perf counters in case of multiplexing
|
||||
- #1528: Add a hard-coded limit on the maximum number of jmethodIDs
|
||||
- Do not walk past virtual thread continuation barriers
|
||||
|
||||
## [4.3] - 2026-01-20
|
||||
|
||||
### Features
|
||||
|
||||
- #1547: Native lock profiling
|
||||
- #1566: Filter cpu/wall profiles by latency
|
||||
- #1568: Expose async-profiler metrics in Prometheus format
|
||||
- #1628: async-profiler.jar as Java agent; remote control via JMX
|
||||
|
||||
### Improvements
|
||||
|
||||
- #1140: FlameGraph improvements: legend, hot keys, new toolbar icons
|
||||
- #1530: Timezone switcher between Local and UTC time in Heatmaps
|
||||
- #1582: Support `--include`/`--exclude` options for JFR to Heatmap/OTLP/pprof conversion
|
||||
- #1624: Compatibility with OTLP v1.9.0
|
||||
- #1629: Harden crash protection in StackWalker
|
||||
|
||||
### Breaking changes
|
||||
|
||||
- #1277: New `timeSpan` field in WallClockSample events
|
||||
- #1518: Deprecate `check` command
|
||||
- #1590: Support compilation on modern JDKs. Drop JDK 7 support
|
||||
|
||||
### Bug fixes
|
||||
|
||||
- #1599: Workaround for the kernel PERF_EVENT_IOC_REFRESH bug
|
||||
- #1596: Do not block any signals during execution of a custom crash handler
|
||||
- #1584: JfrReader loops on corrupted recordings
|
||||
- #1555: Parse FlameGraph title from HTML input
|
||||
- #1621: `loop` and `timeout` options do not work together
|
||||
- #1641: Unwind vDSO correctly on Linux-ARM64
|
||||
- #1648: Fix stop sequence in Profiler::start
|
||||
- #1575: Fix CodeCache memory leak in lock profiling while looping
|
||||
- #1558: Fix record-cpu bug when kernel stacks are not available
|
||||
- #1651: Do not record CPU frame for non-perf samples
|
||||
- #1614, #1615, #1617, #1623: Fix races related to VM termination
|
||||
|
||||
## [4.2.1] - 2025-11-22
|
||||
|
||||
### Bug fixes
|
||||
|
||||
10
Makefile
10
Makefile
@@ -1,4 +1,4 @@
|
||||
PROFILER_VERSION ?= 4.2.1
|
||||
PROFILER_VERSION ?= 4.4
|
||||
|
||||
ifeq ($(COMMIT_TAG),true)
|
||||
PROFILER_VERSION := $(PROFILER_VERSION)-$(shell git rev-parse --short=8 HEAD)
|
||||
@@ -62,7 +62,8 @@ LOG_DIR=build/test/logs
|
||||
LOG_LEVEL=
|
||||
SKIP=
|
||||
RETRY_COUNT=0
|
||||
TEST_FLAGS=-DlogDir=$(LOG_DIR) -DlogLevel=$(LOG_LEVEL) -Dskip='$(subst $(COMMA), ,$(SKIP))' -DretryCount=$(RETRY_COUNT)
|
||||
TEST_THREADS ?= 8
|
||||
TEST_FLAGS=-DlogDir=$(LOG_DIR) -DlogLevel=$(LOG_LEVEL) -Dskip='$(subst $(COMMA), ,$(SKIP))' -DretryCount=$(RETRY_COUNT) -DthreadCount=$(TEST_THREADS)
|
||||
|
||||
# always sort SOURCES so zInit is last.
|
||||
SOURCES := $(sort $(wildcard src/*.cpp))
|
||||
@@ -99,7 +100,8 @@ ifeq ($(OS),Darwin)
|
||||
MERGE=false
|
||||
endif
|
||||
else
|
||||
CXXFLAGS += -U_FORTIFY_SOURCE -Wl,-z,defs -Wl,--exclude-libs,ALL -static-libstdc++ -static-libgcc -fdata-sections -ffunction-sections -Wl,--gc-sections -ggdb -Wunused-variable
|
||||
CXXFLAGS += -U_FORTIFY_SOURCE -Wl,-z,defs -Wl,--exclude-libs,ALL -static-libstdc++ -static-libgcc
|
||||
CXXFLAGS += -fdata-sections -ffunction-sections -Wl,--gc-sections -ggdb -Wunused-variable -Wno-psabi
|
||||
ifeq ($(MERGE),true)
|
||||
CXXFLAGS += -fwhole-program
|
||||
endif
|
||||
@@ -206,7 +208,7 @@ build/$(API_JAR): $(API_SOURCES) $(JAR_MANIFEST)
|
||||
build/$(CONVERTER_JAR): $(CONVERTER_SOURCES) $(RESOURCES)
|
||||
mkdir -p build/converter
|
||||
$(JAVAC) $(JAVAC_OPTIONS) -d build/converter $(CONVERTER_SOURCES)
|
||||
$(JAR) cfe $@ Main -C build/converter . -C src/res .
|
||||
$(JAR) cfe $@ one.convert.Main -C build/converter . -C src/res .
|
||||
$(RM) -r build/converter
|
||||
|
||||
%.class: %.java
|
||||
|
||||
10
README.md
10
README.md
@@ -23,12 +23,12 @@ to learn about more features.
|
||||
|
||||
# Download
|
||||
|
||||
### Stable release: [4.2.1](https://github.com/async-profiler/async-profiler/releases/tag/v4.2.1)
|
||||
### Stable release: [4.3](https://github.com/async-profiler/async-profiler/releases/tag/v4.3)
|
||||
|
||||
- Linux x64: [async-profiler-4.2.1-linux-x64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v4.2.1/async-profiler-4.2.1-linux-x64.tar.gz)
|
||||
- Linux arm64: [async-profiler-4.2.1-linux-arm64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v4.2.1/async-profiler-4.2.1-linux-arm64.tar.gz)
|
||||
- macOS arm64/x64: [async-profiler-4.2.1-macos.zip](https://github.com/async-profiler/async-profiler/releases/download/v4.2.1/async-profiler-4.2.1-macos.zip)
|
||||
- Profile converters: [jfr-converter.jar](https://github.com/async-profiler/async-profiler/releases/download/v4.2.1/jfr-converter.jar)
|
||||
- Linux x64: [async-profiler-4.3-linux-x64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v4.3/async-profiler-4.3-linux-x64.tar.gz)
|
||||
- Linux arm64: [async-profiler-4.3-linux-arm64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v4.3/async-profiler-4.3-linux-arm64.tar.gz)
|
||||
- macOS arm64/x64: [async-profiler-4.3-macos.zip](https://github.com/async-profiler/async-profiler/releases/download/v4.3/async-profiler-4.3-macos.zip)
|
||||
- Profile converters: [jfr-converter.jar](https://github.com/async-profiler/async-profiler/releases/download/v4.3/jfr-converter.jar)
|
||||
|
||||
### Nightly builds
|
||||
|
||||
|
||||
@@ -2,15 +2,17 @@
|
||||
|
||||
async-profiler provides `jfrconv` utility to convert between different profile output formats.
|
||||
`jfrconv` can be found at the same location as the `asprof` binary. Converter is also available
|
||||
as a standalone Java application: [`jfr-converter.jar`](https://github.com/async-profiler/async-profiler/releases/download/v4.2.1/jfr-converter.jar).
|
||||
as a standalone Java application: [`jfr-converter.jar`](https://github.com/async-profiler/async-profiler/releases/latest/download/jfr-converter.jar).
|
||||
|
||||
## Supported conversions
|
||||
|
||||
| Source | html | collapsed | pprof | pb.gz | heatmap | otlp |
|
||||
| --------- | ---- | --------- | ----- | ----- | ------- | ---- |
|
||||
| jfr | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| html | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| collapsed | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
The tool can convert several source formats into various outputs. The conversion capabilities are summarized below:
|
||||
|
||||
| Source format | to html | to collapsed | to pprof | to pb.gz | to heatmap | to otlp |
|
||||
| ------------- | ------- | ------------ | -------- | -------- | ---------- | ------- |
|
||||
| jfr | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| html | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| collapsed | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -43,6 +45,8 @@ Conversion options:
|
||||
|
||||
# otlp: OpenTelemetry profile format.
|
||||
|
||||
Differential Flame Graph:
|
||||
--diff <base-profile> <new-profile>
|
||||
|
||||
JFR options:
|
||||
--cpu Generate only CPU profile during conversion
|
||||
@@ -120,7 +124,7 @@ jfrconv --cpu foo.jfr
|
||||
|
||||
for HTML output as HTML is the default format for conversion from JFR.
|
||||
|
||||
#### Flame Graph options
|
||||
### Flame Graph options
|
||||
|
||||
To add a custom title to the generated Flame Graph, use `--title`, which has the default value `Flame Graph`:
|
||||
|
||||
@@ -128,9 +132,37 @@ To add a custom title to the generated Flame Graph, use `--title`, which has the
|
||||
jfrconv --cpu foo.jfr foo.html -r --title "Custom Title"
|
||||
```
|
||||
|
||||
### Other formats
|
||||
### Differential Flame Graph
|
||||
|
||||
`jfrconv` supports converting a JFR file to `collapsed`, `pprof`, `pb.gz` and `heatmap` formats as well.
|
||||
To find performance regressions, it may be useful to compare current profile
|
||||
to a previous one that serves as a baseline. Differential Flame Graph
|
||||
visualizes such a comparsion with a special color scheme:
|
||||
|
||||
- Red color denotes frames with more samples comparing to the baseline (i.e. regression);
|
||||
- Blue is for frames with less samples;
|
||||
- Yellow are new frames that were absent in the baseline.
|
||||
|
||||
The more intense the color, the larger the delta.
|
||||
For each different frame, the delta value is displayed in a tooltip.
|
||||
|
||||

|
||||
|
||||
Differential Flame Graph takes the shape of the current profile:
|
||||
all frames have exactly the same size as in the normal Flame Graph.
|
||||
This means, frames that exist only in the base profile will not be visible.
|
||||
To see such frames, create another differential Flame Graph,
|
||||
swapping the base and the current input file.
|
||||
|
||||
To create differential Flame Graph, run `jfrconv --diff` with two input files:
|
||||
basline profile and new profile. Both files can be in JFR, HTML, or collapsed format.
|
||||
Other converter options work as usual.
|
||||
|
||||
```
|
||||
jfrconv --cpu --diff baseline.jfr new.jfr diff.html
|
||||
```
|
||||
|
||||
Output file name is optional. If omitted, `jfrconv` takes the name
|
||||
of the second input file, replacing its extension with `.diff.html`.
|
||||
|
||||
## Standalone converter examples
|
||||
|
||||
|
||||
@@ -10,6 +10,10 @@ process requires setting two kernel parameters. You can set them using sysctl as
|
||||
# sysctl kernel.kptr_restrict=0
|
||||
```
|
||||
|
||||
For better profiling accuracy, it is [recommended](Troubleshooting.md#known-limitations)
|
||||
to start the JVM with `-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints` flags,
|
||||
unless async-profiler is loaded at JVM startup.
|
||||
|
||||
## Find a process to profile
|
||||
|
||||
Common ways to find the target process include using
|
||||
|
||||
@@ -9,10 +9,17 @@ it is possible to attach async-profiler as an agent on the command line. For exa
|
||||
$ java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,file=profile.html ...
|
||||
```
|
||||
|
||||
On macOS, the library name is `libasyncProfiler.dylib` instead of `libasyncProfiler.so`.
|
||||
|
||||
Agent library is configured through the JVMTI argument interface.
|
||||
The format of the arguments string is described
|
||||
[in the source code](https://github.com/async-profiler/async-profiler/blob/v4.2.1/src/arguments.cpp#L39).
|
||||
`asprof` actually converts command line arguments to that format.
|
||||
The argument string is a comma-separated list of [profiler options](ProfilerOptions.md):
|
||||
|
||||
```
|
||||
option[=value],option[=value]...
|
||||
```
|
||||
|
||||
`asprof` internally converts command line arguments to the above format and attaches
|
||||
`libasyncProfiler.so` agent to a running process.
|
||||
|
||||
Another important use of attaching async-profiler as an agent is for continuous profiling.
|
||||
|
||||
|
||||
@@ -22,16 +22,19 @@ The below options are `action`s for async-profiler and common for both `asprof`
|
||||
| `metrics` | Print profiler metrics in Prometheus format. |
|
||||
| `list` | Show the list of profiling events available for the target process specified with PID. |
|
||||
|
||||
## Options applicable to any output format
|
||||
## General options
|
||||
|
||||
| asprof | Launch as agent | Description |
|
||||
| -------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `-o fmt` | `fmt` | Specifies what information to dump when profiling ends. For various dump option details, please refer to [Dump Option Appendix](#dump-option). |
|
||||
| `-f FILENAME` | `file=FILENAME` | The file name to dump the profile information to.<br>`%p` in the file name is expanded to the PID of the target JVM;<br>`%t` - to the timestamp;<br>`%n{MAX}` - to the sequence number;<br>`%{ENV}` - to the value of the given environment variable.<br>Example: `asprof -o collapsed -f /tmp/traces-%t.txt 8983` |
|
||||
| `-d N` | N/A | asprof-only option designed for interactive use. It is a shortcut for running 3 actions: start, sleep for N seconds, stop. If no `start`, `resume`, `stop` or `status` option is given, the profiler will run for the specified period of time and then automatically stop.<br>Example: `asprof -d 30 <pid>` |
|
||||
| `--timeout N` | `timeout=N` | The profiling duration, in seconds. The profiler will run for the specified period of time and then automatically stop.<br>Example: `java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,timeout=30,file=profile.html <application>` |
|
||||
| `--loop TIME` | `loop=TIME` | Run profiler in a loop (continuous profiling). The argument is either a clock time (`hh:mm:ss`) or a loop duration in `s`econds, `m`inutes, `h`ours, or `d`ays. Make sure the filename includes a timestamp pattern, or the output will be overwritten on each iteration.<br>Example: `asprof --loop 1h -f /var/log/profile-%t.jfr 8983` |
|
||||
| `-e --event EVENT` | `event=EVENT` | The profiling event: `cpu`, `alloc`, `nativemem`, `lock`, `cache-misses` etc. Use `list` to see the complete list of available events.<br>Please refer to [Profiling Modes](ProfilingModes.md) for additional information. |
|
||||
| `-i --interval N` | `interval=N` | Interval has different meaning depending on the event. For CPU profiling, it's CPU time in nanoseconds. In wall clock mode, it's wall clock time. For Java method profiling or native function profiling, it's number of calls. For PMU profiling, it's number of events. Time intervals may be followed by `s` for seconds, `ms` for milliseconds, `us` for microseconds or `ns` for nanoseconds.<br>Example: `asprof -e cpu -i 5ms 8983` |
|
||||
| `--alloc N` | `alloc=N` | Allocation profiling interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). |
|
||||
| `--tlab` | `tlab` | Use TLAB events for allocation profiling |
|
||||
| `--live` | `live` | Retain allocation samples with live objects only (object that have not been collected by the end of profiling session). Useful for finding Java heap memory leaks. |
|
||||
| `--nativemem N` | `nativemem=N` | Native memory allocation profiling. N, if specified is the interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). Default N is 0. |
|
||||
| `--nofree` | `nofree` | Will not record free calls in native memory allocation profiling. This is relevant when tracking memory leaks is not important and there are lots of free calls. |
|
||||
@@ -39,24 +42,24 @@ The below options are `action`s for async-profiler and common for both `asprof`
|
||||
| `--lock TIME` | `lock=TIME` | In lock profiling mode, sample contended locks whenever total lock wait time overflows the specified threshold. |
|
||||
| `--nativelock TIME` | `nativelock=TIME ` | In native lock profiling mode, sample contended pthread locks (mutex/rwlock) whenever total lock wait time overflows the specified threshold. |
|
||||
| `--wall INTERVAL` | `wall=INTERVAL` | Wall clock profiling interval. Use this option instead of `-e wall` to enable wall clock profiling with another event, typically `cpu`.<br>Example: `asprof -e cpu --wall 100ms -f combined.jfr 8983`. |
|
||||
| `--proc INTERVAL` | `proc=INTERVAL` | Collect statistics about other processes in the system. Default sampling interval is 30s. |
|
||||
| `-j N` | `jstackdepth=N` | Sets the maximum stack depth. The default is 2048.<br>Example: `asprof -j 30 8983` |
|
||||
| `-I PATTERN` | `include=PATTERN` | Filter stack traces by the given pattern(s). `-I` defines the name pattern that _must_ be present in the stack traces. `-I` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -I 'Primes.*' -I 'java/*' 8983` |
|
||||
| `-X PATTERN` | `exclude=PATTERN` | Filter stack traces by the given pattern(s). `-X` defines the name pattern that _must not_ occur in any of stack traces in the output. `-X` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -X '*Unsafe.park*' 8983` |
|
||||
| `-L level` | `loglevel=level` | Log level: `debug`, `info`, `warn`, `error` or `none`. |
|
||||
| `--nobatch` | `nobatch` | Disable wall clock profiling optimization. Async-profiler will emit one `jdk.ExecutionSample` event for each wall clock sample instead of batching them in a custom `profiler.WallClockSample` event. |
|
||||
| `-j N` | `jstackdepth=N` | Sets the maximum stack depth. The default is 2048.<br>Example: `asprof -j 30 8983`<br>The argument may include two numbers separated by `/` (e.g. `200/40`). In this case, stack traces deeper than 200 frames will be truncated to the top 40 frames. This can be useful to prevent a deep recursion from bloating the profile. |
|
||||
| `-F features` | `features=LIST` | Comma separated (or `+` separated when launching as an agent) list of stack walking features. Supported features are:<ul><li>`stats` - log stack walking performance stats.</li><li>`vtable` - display targets of megamorphic virtual calls as an extra frame on top of `vtable stub` or `itable stub`.</li><li>`comptask` - display current compilation task (a Java method being compiled) in a JIT compiler stack trace.</li><li>`pcaddr` - display instruction addresses .</li></ul>More details [here](AdvancedStacktraceFeatures.md). |
|
||||
| `-f FILENAME` | `file` | The file name to dump the profile information to.<br>`%p` in the file name is expanded to the PID of the target JVM;<br>`%t` - to the timestamp;<br>`%n{MAX}` - to the sequence number;<br>`%{ENV}` - to the value of the given environment variable.<br>Example: `asprof -o collapsed -f /tmp/traces-%t.txt 8983` |
|
||||
| `--loop TIME` | `loop=TIME` | Run profiler in a loop (continuous profiling). The argument is either a clock time (`hh:mm:ss`) or a loop duration in `s`econds, `m`inutes, `h`ours, or `d`ays. Make sure the filename includes a timestamp pattern, or the output will be overwritten on each iteration.<br>Example: `asprof --loop 1h -f /var/log/profile-%t.jfr 8983` |
|
||||
| `-L level` | `loglevel=level` | Log level: `debug`, `info`, `warn`, `error` or `none`. |
|
||||
| N/A | `log=FILENAME` | Dedicated file for log messages. Used internally by asprof. |
|
||||
| N/A | `quiet` | Do not log "Profiling started/stopped" message. Used internally by asprof. |
|
||||
| N/A | `server=ADDRESS` | Start insecure HTTP server with the given IP address/port to control the profiler. This option can be specified as `-agentpath` argument only. Be careful not to expose async-profiler server in a public network. |
|
||||
| `--all-user` | `alluser` | Include only user-mode events. This option is helpful when kernel profiling is restricted by `perf_event_paranoid` settings. |
|
||||
| `--sched` | `sched` | Group threads by Linux-specific scheduling policy: BATCH/IDLE/OTHER. |
|
||||
| `--cstack MODE` | `cstack=MODE` | How to walk native frames (C stack). Possible modes are `fp` (Frame Pointer), `dwarf` (DWARF unwind info), `lbr` (Last Branch Record, available on Haswell since Linux 4.1), `vm`, `vmx` (HotSpot VM Structs) and `no` (do not collect C stack).<br><br>By default, C stack is shown in cpu, ctimer, wall-clock and perf-events profiles. Java-level events like `alloc` and `lock` collect only Java stack. |
|
||||
| `--cstack MODE` | `cstack=MODE` | How to walk native frames (C stack). Possible modes are `fp` (Frame Pointer), `dwarf` (DWARF unwind info), `vm`, `vmx` (HotSpot VM Structs) and `no` (do not collect C stack).<br><br>By default, C stack is shown in cpu, ctimer, wall-clock and perf-events profiles. Java-level events like `alloc` and `lock` collect only Java stack. |
|
||||
| `--signal NUM` | `signal=NUM` | Use alternative signal for cpu or wall clock profiling. To change both signals, specify two numbers separated by a slash: `--signal SIGCPU/SIGWALL`. |
|
||||
| `--clock SOURCE` | `clock=SOURCE` | Clock source for JFR timestamps: `tsc` (default) or `monotonic` (equivalent for `CLOCK_MONOTONIC`). |
|
||||
| `--begin function` | `begin=FUNCTION` | Automatically start profiling when the specified native function is executed. |
|
||||
| `--end function` | `end=FUNCTION` | Automatically stop profiling when the specified native function is executed. |
|
||||
| `--ttsp` | `ttsp` | Time-to-safepoint profiling. An alias for `--begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized`.<br>It is not a separate event type, but rather a constraint. Whatever event type you choose (e.g. `cpu` or `wall`), the profiler will work as usual, except that only events between the safepoint request and the start of the VM operation will be recorded. |
|
||||
| `--nostop` | `nostop` | Record profiling window between `--begin` and `--end`, but do not stop profiling outside window. |
|
||||
| `--libpath PATH` | `libpath=PATH` | Full path to `libasyncProfiler.so` (useful when profiling a container from the host). |
|
||||
| `--memlimit SIZE` | `memlimit=SIZE` | Limit memory used by the call trace storage. Once the limit is exceeded, no new stack traces will be recorded. The lowest possible limit is 10 MB; the default is unlimited.<br>Example: `asprof -e cpu --memlimit 128m` |
|
||||
| `--libpath PATH` | N/A | Full path to `libasyncProfiler.so` (useful when profiling a container from the host). |
|
||||
| `--filter FILTER` | `filter=FILTER` | In the wall-clock profiling mode, profile only threads with the specified ids.<br>Example: `asprof -e wall -d 30 --filter 120-127,132,134 Computey` |
|
||||
| `--fdtransfer` | `fdtransfer` | Run a background process that provides access to perf_events to an unprivileged process. `--fdtransfer` is useful for profiling a process in a container (which lacks access to perf_events) from the host.<br>See [Profiling Java in a container](ProfilingInContainer.md). |
|
||||
| `--target-cpu` | `target-cpu` | In perf_events profiling mode, instruct the profiler to only sample threads running on the specified CPU, defaults to -1.<br>Example: `asprof --target-cpu 3`. |
|
||||
@@ -65,13 +68,14 @@ The below options are `action`s for async-profiler and common for both `asprof`
|
||||
|
||||
## Options applicable to JFR output only
|
||||
|
||||
| asprof | Launch as agent | Description |
|
||||
| ------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `--chunksize N` | `chunksize=N` | Approximate size for a single JFR chunk. A new chunk will be started whenever specified size is reached. The default `chunksize` is 100MB.<br>Example: `asprof -f profile.jfr --chunksize 100m 8983` |
|
||||
| `--chunktime N` | `chunktime=N` | Approximate time limit for a single JFR chunk. A new chunk will be started whenever specified time limit is reached. The default `chunktime` is 1 hour.<br>Example: `asprof -f profile.jfr --chunktime 1h 8983` |
|
||||
| `--jfropts OPTIONS` | `jfropts=OPTIONS` | Comma separated list of JFR recording options. Currently, the only available option is `mem` supported on Linux 3.17+. `mem` enables accumulating events in memory instead of flushing synchronously to a file. |
|
||||
| `--jfrsync CONFIG` | `jfrsync[=CONFIG]` | Start Java Flight Recording with the given configuration synchronously with the profiler. The output .jfr file will include all regular JFR events, except that execution samples will be obtained from async-profiler. This option implies `-o jfr`.<br>`CONFIG` is a predefined JFR profile or a JFR configuration file (.jfc) or a list of JFR events started with `+`.<br><br>Example: `asprof -e cpu --jfrsync profile -f combined.jfr 8983` |
|
||||
| `--all` | `all` | Shorthand for enabling `cpu`, `wall`, `alloc`, `live`, `nativemem` and `lock` profiling simultaneously. This can be combined with `--alloc 2m --lock 10ms` etc. to pass custom interval/threshold. It is also possible to combine it with `-e` argument to change the type of event being collected (default is `cpu`). This is not recommended for production, especially for continuous profiling. |
|
||||
| asprof | Launch as agent | Description |
|
||||
| ------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `--chunksize N` | `chunksize=N` | Approximate size for a single JFR chunk. A new chunk will be started whenever specified size is reached. The default `chunksize` is 100MB.<br>Example: `asprof -f profile.jfr --chunksize 100m 8983` |
|
||||
| `--chunktime N` | `chunktime=N` | Approximate time limit for a single JFR chunk. A new chunk will be started whenever specified time limit is reached. The default `chunktime` is 1 hour.<br>Example: `asprof -f profile.jfr --chunktime 1h 8983` |
|
||||
| `--jfropts OPTIONS` | `jfropts=OPTIONS` | Comma separated list of JFR recording options. Currently, the only available option is `mem` supported on Linux 3.17+. `mem` enables accumulating events in memory instead of flushing synchronously to a file. |
|
||||
| `--jfrsync CONFIG` | `jfrsync[=CONFIG]` | Start Java Flight Recording with the given configuration synchronously with the profiler. The output .jfr file will include all regular JFR events, except that execution samples will be obtained from async-profiler. This option implies `-o jfr`.<br>`CONFIG` is a predefined JFR profile or a JFR configuration file (.jfc) or a list of JFR events started with `+`.<br>Example: `asprof -e cpu --jfrsync profile -f combined.jfr 8983` |
|
||||
| `--proc INTERVAL` | `proc=INTERVAL` | Collect statistics about other processes in the system. Default sampling interval is 30s. |
|
||||
| `--all` | `all` | Shorthand for enabling `cpu`, `wall`, `alloc`, `live`, `lock`, `nativelock`, `nativemem`, and `proc` profiling simultaneously. This can be combined with `--alloc 2m --lock 10ms` etc. to pass custom interval/threshold. It is also possible to combine it with `-e` argument to change the type of event being collected (default is `cpu`). This is not recommended for production, especially for continuous profiling. |
|
||||
|
||||
## Options applicable to FlameGraph and Tree view outputs only
|
||||
|
||||
@@ -88,15 +92,20 @@ By default, async-profiler merges stack traces starting from the outermost (e.g.
|
||||
|
||||
## Options applicable to any output format except JFR
|
||||
|
||||
| asprof | Launch as agent | Description |
|
||||
| -------------- | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `-t --threads` | `threads` | Profile threads separately. Each stack trace will end with a frame that denotes a single thread.<br>Example: `asprof -t 8983` |
|
||||
| `-s --simple` | `simple` | Print simple class names instead of fully qualified names. |
|
||||
| `-n --norm` | `norm` | Normalize names of hidden classes / lambdas. |
|
||||
| `-g --sig` | `sig` | Print method signatures. |
|
||||
| `-l --lib` | `lib` | Prepend library names to symbols, e.g. ``libjvm.so`JVM_DefineClassWithSource``. |
|
||||
| `--total` | `total` | Count the total value of the collected metric instead of the number of samples, e.g. total allocation size. |
|
||||
| `-a --ann` | `ann` | Annotate JIT compiled methods with `_[j]`, inlined methods with `_[i]`, interpreted methods with `_[0]` and C1 compiled methods with `_[1]`. FlameGraph and Tree view will color frames depending on their type regardless of this option. |
|
||||
| asprof | Launch as agent | Description |
|
||||
| -------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `-t --threads` | `threads` | Profile threads separately. Each stack trace will end with a frame that denotes a single thread.<br>Example: `asprof -t 8983` |
|
||||
| `-s --simple` | `simple` | Print simple class names instead of fully qualified names. |
|
||||
| `-n --norm` | `norm` | Normalize names of hidden classes / lambdas. |
|
||||
| `-g --sig` | `sig` | Print method signatures. |
|
||||
| `-a --ann` | `ann` | Annotate JIT compiled methods with `_[j]`, inlined methods with `_[i]`, interpreted methods with `_[0]` and C1 compiled methods with `_[1]`. FlameGraph and Tree view will color frames depending on their type regardless of this option. |
|
||||
| `-l --lib` | `lib` | Prepend library names to symbols, e.g. ``libjvm.so`JVM_DefineClassWithSource``. |
|
||||
| `--dot` | `dot` | Dotted class names, e.g. `java.lang.String` instead of `java/lang/String`. |
|
||||
| `--samples` | `samples` | Count the number of samples. This is the default aggregation option. |
|
||||
| `--total` | `total` | Count the total value of the collected metric instead of the number of samples, e.g. total allocation size. |
|
||||
| `-I PATTERN` | `include=PATTERN` | Filter stack traces by the given pattern(s). `-I` defines the name pattern that _must_ be present in the stack traces. `-I` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -I 'Primes.*' -I 'java/*' 8983` |
|
||||
| `-X PATTERN` | `exclude=PATTERN` | Filter stack traces by the given pattern(s). `-X` defines the name pattern that _must not_ occur in any of stack traces in the output. `-X` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -X '*Unsafe.park*' 8983` |
|
||||
| N/A | `mcache[=AGE]` | Maximum age of the method name cache. Default is `0` (do not cache method names between profiling sessions). |
|
||||
|
||||
## Appendix
|
||||
|
||||
|
||||
@@ -23,18 +23,6 @@ due to being signal safe in async-profiler.
|
||||
|
||||
The feature can be enabled with the option `--cstack dwarf` (or its agent equivalent `cstack=dwarf`).
|
||||
|
||||
## LBR
|
||||
|
||||
Modern Intel CPUs can profile branch instructions, including `call`s and `ret`s, and store their source and destination
|
||||
addresses (Last Branch Records) in hardware registers. Starting from Haswell, CPU can match these addresses to form a
|
||||
branch stack. This branch stack will be effectively a call chain automatically collected by the hardware.
|
||||
|
||||
LBR stacks are not always complete or accurate, but they still appear much more helpful comparing to FP-based stack
|
||||
walking, when a native library is compiled with omitted frame pointers. It works only with hardware events like
|
||||
`-e cycles` (`instructions`, `cache-misses` etc.) and the maximum call chain depth is 32 (hardware limit).
|
||||
|
||||
The feature can be enabled with the option `--cstack lbr` (or its agent equivalent `cstack=lbr`).
|
||||
|
||||
## VM Structs
|
||||
|
||||
async-profiler can leverage JVM internal structures to replicate the logic of Java stack walking
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>tools.profiler</groupId>
|
||||
<artifactId>jfr-converter</artifactId>
|
||||
<version>4.2.1</version>
|
||||
<version>4.4</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>async-profiler</name>
|
||||
@@ -57,7 +57,7 @@
|
||||
<configuration>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>Main</mainClass>
|
||||
<mainClass>one.convert.Main</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
</configuration>
|
||||
|
||||
7
pom.xml
7
pom.xml
@@ -3,7 +3,7 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>tools.profiler</groupId>
|
||||
<artifactId>async-profiler</artifactId>
|
||||
<version>4.2.1</version>
|
||||
<version>4.4</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>async-profiler</name>
|
||||
@@ -98,6 +98,11 @@
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<archive>
|
||||
<manifestFile>src/api/one/profiler/MANIFEST.MF</manifestFile>
|
||||
</archive>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
|
||||
@@ -177,7 +177,7 @@ public class AsyncProfiler implements AsyncProfilerMXBean {
|
||||
|
||||
/**
|
||||
* Execute an agent-compatible profiling command -
|
||||
* the comma-separated list of arguments described in arguments.cpp
|
||||
* the comma-separated list of arguments defined in arguments.cpp
|
||||
*
|
||||
* @param command Profiling command
|
||||
* @return The command result
|
||||
|
||||
29
src/arch.h
29
src/arch.h
@@ -27,27 +27,26 @@ typedef unsigned short u16;
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned long long u64;
|
||||
|
||||
static inline u64 atomicInc(volatile u64& var, u64 increment = 1) {
|
||||
return __sync_fetch_and_add(&var, increment);
|
||||
template<typename T>
|
||||
static inline T atomicInc(T& var, T increment = 1) {
|
||||
return __atomic_fetch_add(&var, increment, __ATOMIC_ACQ_REL);
|
||||
}
|
||||
|
||||
static inline int atomicInc(volatile u32& var, int increment = 1) {
|
||||
return __sync_fetch_and_add(&var, increment);
|
||||
template<typename T>
|
||||
static inline T atomicDec(T& var, T decrement = 1) {
|
||||
return __atomic_fetch_sub(&var, decrement, __ATOMIC_ACQ_REL);
|
||||
}
|
||||
|
||||
static inline int atomicInc(volatile int& var, int increment = 1) {
|
||||
return __sync_fetch_and_add(&var, increment);
|
||||
}
|
||||
|
||||
static inline u64 loadAcquire(u64& var) {
|
||||
template<typename T>
|
||||
static inline T loadAcquire(T& var) {
|
||||
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
|
||||
static inline void storeRelease(u64& var, u64 value) {
|
||||
return __atomic_store_n(&var, value, __ATOMIC_RELEASE);
|
||||
template<typename T, typename U>
|
||||
static inline void storeRelease(T& var, U value) {
|
||||
__atomic_store_n(&var, static_cast<T>(value), __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
typedef unsigned char instruction_t;
|
||||
@@ -56,7 +55,6 @@ const int BREAKPOINT_OFFSET = 0;
|
||||
|
||||
const int SYSCALL_SIZE = 2;
|
||||
const int FRAME_PC_SLOT = 1;
|
||||
const int PROBE_SP_LIMIT = 4;
|
||||
const int PLT_HEADER_SIZE = 16;
|
||||
const int PLT_ENTRY_SIZE = 16;
|
||||
const int PERF_REG_PC = 8; // PERF_REG_X86_IP
|
||||
@@ -78,7 +76,6 @@ const int BREAKPOINT_OFFSET = 0;
|
||||
|
||||
const int SYSCALL_SIZE = sizeof(instruction_t);
|
||||
const int FRAME_PC_SLOT = 1;
|
||||
const int PROBE_SP_LIMIT = 0;
|
||||
const int PLT_HEADER_SIZE = 20;
|
||||
const int PLT_ENTRY_SIZE = 12;
|
||||
const int PERF_REG_PC = 15; // PERF_REG_ARM_PC
|
||||
@@ -99,7 +96,6 @@ const int BREAKPOINT_OFFSET = 0;
|
||||
|
||||
const int SYSCALL_SIZE = sizeof(instruction_t);
|
||||
const int FRAME_PC_SLOT = 1;
|
||||
const int PROBE_SP_LIMIT = 0;
|
||||
const int PLT_HEADER_SIZE = 32;
|
||||
const int PLT_ENTRY_SIZE = 16;
|
||||
const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC
|
||||
@@ -122,7 +118,6 @@ const int BREAKPOINT_OFFSET = 8;
|
||||
|
||||
const int SYSCALL_SIZE = sizeof(instruction_t);
|
||||
const int FRAME_PC_SLOT = 2;
|
||||
const int PROBE_SP_LIMIT = 0;
|
||||
const int PLT_HEADER_SIZE = 24;
|
||||
const int PLT_ENTRY_SIZE = 24;
|
||||
const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP
|
||||
@@ -147,7 +142,6 @@ const int BREAKPOINT_OFFSET = 0;
|
||||
|
||||
const int SYSCALL_SIZE = sizeof(instruction_t);
|
||||
const int FRAME_PC_SLOT = 1; // return address is at -1 from FP
|
||||
const int PROBE_SP_LIMIT = 0;
|
||||
const int PLT_HEADER_SIZE = 24; // Best guess from examining readelf
|
||||
const int PLT_ENTRY_SIZE = 24; // ...same...
|
||||
const int PERF_REG_PC = 0; // PERF_REG_RISCV_PC
|
||||
@@ -168,7 +162,6 @@ const int BREAKPOINT_OFFSET = 0;
|
||||
|
||||
const int SYSCALL_SIZE = sizeof(instruction_t);
|
||||
const int FRAME_PC_SLOT = 1;
|
||||
const int PROBE_SP_LIMIT = 0;
|
||||
const int PLT_HEADER_SIZE = 32;
|
||||
const int PLT_ENTRY_SIZE = 16;
|
||||
const int PERF_REG_PC = 0; // PERF_REG_LOONGARCH_PC
|
||||
|
||||
@@ -37,84 +37,7 @@ const size_t EXTRA_BUF_SIZE = 512;
|
||||
|
||||
|
||||
// Parses agent arguments.
|
||||
// The format of the string is:
|
||||
// arg[,arg...]
|
||||
// where arg is one of the following options:
|
||||
// start - start profiling
|
||||
// resume - start or resume profiling without resetting collected data
|
||||
// stop - stop profiling
|
||||
// dump - dump collected data without stopping profiling session
|
||||
// status - print profiling status (inactive / running for X seconds)
|
||||
// metrics - print profiler metrics in Prometheus format
|
||||
// list - show the list of available profiling events
|
||||
// version - display the agent version
|
||||
// event=EVENT - which event to trace (cpu, wall, cache-misses, etc.)
|
||||
// alloc[=BYTES] - profile allocations with BYTES interval
|
||||
// live - build allocation profile from live objects only
|
||||
// nativemem[=BYTES] - profile native allocations with BYTES interval
|
||||
// nofree - do not collect free calls in native allocation profiling
|
||||
// trace=METHOD[:DURATION] - method to be traced with optional latency threshold
|
||||
// lock[=DURATION] - profile contended locks overflowing the DURATION bucket (default: 10us)
|
||||
// nativelock[=DURATION] - profile contended pthread locks overflowing the DURATION bucket (default: 10us)
|
||||
// wall[=NS] - run wall clock profiling together with CPU profiling
|
||||
// nobatch - legacy wall clock sampling without batch events
|
||||
// proc[=S] - collect process stats (default: 30s)
|
||||
// collapsed - dump collapsed stacks (the format used by FlameGraph script)
|
||||
// flamegraph - produce Flame Graph in HTML format
|
||||
// tree - produce call tree in HTML format
|
||||
// jfr - dump events in Java Flight Recorder format
|
||||
// jfropts=OPTIONS - JFR recording options: numeric bitmask or 'mem'
|
||||
// jfrsync[=CONFIG] - start Java Flight Recording with the given config along with the profiler
|
||||
// traces[=N] - dump top N call traces
|
||||
// flat[=N] - dump top N methods (aka flat profile)
|
||||
// otlp - dump in OpenTelemetry format
|
||||
// samples - count the number of samples (default)
|
||||
// total - count the total value (time, bytes, etc.) instead of samples
|
||||
// chunksize=N - approximate size of JFR chunk in bytes (default: 100 MB)
|
||||
// chunktime=N - duration of JFR chunk in seconds (default: 1 hour)
|
||||
// timeout=TIME - automatically stop profiler at TIME (absolute or relative)
|
||||
// loop=TIME - run profiler in a loop (continuous profiling)
|
||||
// interval=N - sampling interval in ns (default: 10'000'000, i.e. 10 ms)
|
||||
// jstackdepth=N - maximum Java stack depth (default: 2048)
|
||||
// signal=N - use alternative signal for cpu or wall clock profiling
|
||||
// features=LIST - advanced stack trace features (mixed, vtable, comptask, pcaddr)"
|
||||
// safemode=BITS - disable stack recovery techniques (default: 0, i.e. everything enabled)
|
||||
// file=FILENAME - output file name for dumping
|
||||
// log=FILENAME - log warnings and errors to the given dedicated stream
|
||||
// loglevel=LEVEL - logging level: TRACE, DEBUG, INFO, WARN, ERROR, or NONE
|
||||
// quiet - do not log "Profiling started/stopped" message
|
||||
// server=ADDRESS - start insecure HTTP server at ADDRESS/PORT
|
||||
// filter=FILTER - thread filter
|
||||
// threads - profile different threads separately
|
||||
// sched - group threads by scheduling policy
|
||||
// cstack=MODE - how to collect C stack frames in addition to Java stack
|
||||
// MODE is 'fp', 'dwarf', 'lbr', 'vm' or 'no'
|
||||
// clock=SOURCE - clock source for JFR timestamps: 'tsc' or 'monotonic'
|
||||
// alluser - include only user-mode events
|
||||
// fdtransfer - use fdtransfer to pass fds to the profiler
|
||||
// target-cpu=CPU - sample threads on a specific CPU (perf_events only, default: -1)
|
||||
// record-cpu - record which cpu a sample was taken on
|
||||
// simple - simple class names instead of FQN
|
||||
// dot - dotted class names
|
||||
// norm - normalize names of hidden classes / lambdas
|
||||
// sig - print method signatures
|
||||
// ann - annotate Java methods
|
||||
// lib - prepend library names
|
||||
// mcache - max age of jmethodID cache (default: 0 = disabled)
|
||||
// include=PATTERN - include stack traces containing PATTERN
|
||||
// exclude=PATTERN - exclude stack traces containing PATTERN
|
||||
// begin=FUNCTION - begin profiling when FUNCTION is executed
|
||||
// end=FUNCTION - end profiling when FUNCTION is executed
|
||||
// nostop - do not stop profiling outside --begin/--end window
|
||||
// ttsp - only time-to-safepoint profiling
|
||||
// title=TITLE - FlameGraph title
|
||||
// minwidth=PCT - FlameGraph minimum frame width in percent
|
||||
// reverse - generate stack-reversed FlameGraph / Call tree (defaults to icicle graph)
|
||||
// inverted - toggles the layout for reversed stacktraces from icicle to flamegraph
|
||||
// and for default stacktraces from flamegraph to icicle
|
||||
//
|
||||
// It is possible to specify multiple dump options at the same time
|
||||
|
||||
// The format of the string is: arg[,arg...]
|
||||
Error Arguments::parse(const char* args) {
|
||||
if (args == NULL) {
|
||||
return Error::OK;
|
||||
@@ -148,9 +71,6 @@ Error Arguments::parse(const char* args) {
|
||||
CASE("dump")
|
||||
_action = ACTION_DUMP;
|
||||
|
||||
CASE("check")
|
||||
_action = ACTION_CHECK;
|
||||
|
||||
CASE("status")
|
||||
_action = ACTION_STATUS;
|
||||
|
||||
@@ -246,9 +166,15 @@ Error Arguments::parse(const char* args) {
|
||||
msg = "Invalid loop duration";
|
||||
}
|
||||
|
||||
CASE("memlimit")
|
||||
_mem_limit = value == NULL ? 0 : parseUnits(value, BYTES);
|
||||
|
||||
CASE("alloc")
|
||||
_alloc = value == NULL ? 0 : parseUnits(value, BYTES);
|
||||
|
||||
CASE("tlab")
|
||||
_tlab = true;
|
||||
|
||||
CASE("nativemem")
|
||||
_nativemem = value == NULL ? 0 : parseUnits(value, BYTES);
|
||||
|
||||
@@ -312,6 +238,9 @@ Error Arguments::parse(const char* args) {
|
||||
CASE("jstackdepth")
|
||||
if (value == NULL || (_jstackdepth = atoi(value)) <= 0) {
|
||||
msg = "jstackdepth must be > 0";
|
||||
} else {
|
||||
char* slash = strchr(value, '/');
|
||||
_truncated_stack_depth = slash != NULL ? atoi(slash + 1) : _jstackdepth;
|
||||
}
|
||||
|
||||
CASE("signal")
|
||||
@@ -326,24 +255,13 @@ Error Arguments::parse(const char* args) {
|
||||
if (value != NULL) {
|
||||
if (strstr(value, "stats")) _features.stats = 1;
|
||||
if (strstr(value, "jnienv")) _features.jnienv = 1;
|
||||
if (strstr(value, "probesp")) _features.probe_sp = 1;
|
||||
if (strstr(value, "agct")) _features.agct = 1;
|
||||
if (strstr(value, "mixed")) _features.mixed = 1;
|
||||
if (strstr(value, "vtable")) _features.vtable_target = 1;
|
||||
if (strstr(value, "comptask")) _features.comp_task = 1;
|
||||
if (strstr(value, "pcaddr")) _features.pc_addr = 1;
|
||||
}
|
||||
|
||||
CASE("safemode") {
|
||||
// Left for compatibility purpose; will be eventually migrated to 'features'
|
||||
int bits = value == NULL ? INT_MAX : (int)strtol(value, NULL, 0);
|
||||
_features.unknown_java = (bits & 1) ? 0 : 1;
|
||||
_features.unwind_stub = (bits & 2) ? 0 : 1;
|
||||
_features.unwind_comp = (bits & 4) ? 0 : 1;
|
||||
_features.unwind_native = (bits & 8) ? 0 : 1;
|
||||
_features.java_anchor = (bits & 16) ? 0 : 1;
|
||||
_features.gc_traces = (bits & 32) ? 0 : 1;
|
||||
}
|
||||
|
||||
CASE("file")
|
||||
if (value == NULL || value[0] == 0) {
|
||||
msg = "file must not be empty";
|
||||
@@ -409,8 +327,6 @@ Error Arguments::parse(const char* args) {
|
||||
_cstack = CSTACK_FP;
|
||||
} else if (strcmp(value, "dwarf") == 0) {
|
||||
_cstack = CSTACK_DWARF;
|
||||
} else if (strcmp(value, "lbr") == 0) {
|
||||
_cstack = CSTACK_LBR;
|
||||
} else if (strcmp(value, "vm") == 0) {
|
||||
_cstack = CSTACK_VM;
|
||||
} else if (strcmp(value, "vmx") == 0) {
|
||||
|
||||
@@ -33,7 +33,6 @@ enum SHORT_ENUM Action {
|
||||
ACTION_RESUME,
|
||||
ACTION_STOP,
|
||||
ACTION_DUMP,
|
||||
ACTION_CHECK,
|
||||
ACTION_STATUS,
|
||||
ACTION_METRICS,
|
||||
ACTION_LIST,
|
||||
@@ -61,7 +60,6 @@ enum SHORT_ENUM CStack {
|
||||
CSTACK_NO, // do not collect native frames
|
||||
CSTACK_FP, // walk stack using Frame Pointer links
|
||||
CSTACK_DWARF, // use DWARF unwinding info from .eh_frame section
|
||||
CSTACK_LBR, // Last Branch Record hardware capability
|
||||
CSTACK_VM // unwind using HotSpot VMStructs
|
||||
};
|
||||
|
||||
@@ -107,25 +105,14 @@ enum EventMask {
|
||||
constexpr int EVENT_MASK_SIZE = 7;
|
||||
|
||||
struct StackWalkFeatures {
|
||||
// Deprecated stack recovery techniques used to workaround AsyncGetCallTrace flaws
|
||||
unsigned short unknown_java : 1;
|
||||
unsigned short unwind_stub : 1;
|
||||
unsigned short unwind_comp : 1;
|
||||
unsigned short unwind_native : 1;
|
||||
unsigned short java_anchor : 1;
|
||||
unsigned short gc_traces : 1;
|
||||
|
||||
// Common features
|
||||
unsigned short stats : 1; // collect stack walking duration statistics
|
||||
|
||||
// Additional HotSpot-specific features
|
||||
unsigned short jnienv : 1; // verify JNIEnv* obtained using VMStructs
|
||||
unsigned short probe_sp : 1; // when AsyncGetCallTrace fails, adjust SP and retry
|
||||
unsigned short agct : 1; // force usage of AsyncGetCallTrace instead of VMStructs
|
||||
unsigned short mixed : 1; // mixed stack traces with Java and native frames interleaved
|
||||
unsigned short vtable_target : 1; // show receiver classes of vtable/itable stubs
|
||||
unsigned short comp_task : 1; // display current compilation task for JIT threads
|
||||
unsigned short pc_addr : 1; // record exact PC address for each sample
|
||||
unsigned short _padding : 3; // pad structure to 16 bits
|
||||
unsigned short _padding : 9; // pad structure to 16 bits
|
||||
};
|
||||
|
||||
|
||||
@@ -177,6 +164,7 @@ class Arguments {
|
||||
std::vector<const char*> _trace;
|
||||
int _timeout;
|
||||
int _loop;
|
||||
size_t _mem_limit;
|
||||
long _interval;
|
||||
long _alloc;
|
||||
long _nativemem;
|
||||
@@ -186,6 +174,7 @@ class Arguments {
|
||||
long _proc;
|
||||
bool _all;
|
||||
int _jstackdepth;
|
||||
int _truncated_stack_depth;
|
||||
int _signal;
|
||||
const char* _file;
|
||||
const char* _log;
|
||||
@@ -201,6 +190,7 @@ class Arguments {
|
||||
bool _threads;
|
||||
bool _sched;
|
||||
bool _record_cpu;
|
||||
bool _tlab;
|
||||
bool _live;
|
||||
bool _nofree;
|
||||
bool _nobatch;
|
||||
@@ -238,6 +228,7 @@ class Arguments {
|
||||
_trace(),
|
||||
_timeout(0),
|
||||
_loop(0),
|
||||
_mem_limit(0),
|
||||
_interval(0),
|
||||
_alloc(-1),
|
||||
_nativemem(-1),
|
||||
@@ -247,6 +238,7 @@ class Arguments {
|
||||
_proc(-1),
|
||||
_all(false),
|
||||
_jstackdepth(DEFAULT_JSTACKDEPTH),
|
||||
_truncated_stack_depth(DEFAULT_JSTACKDEPTH),
|
||||
_signal(0),
|
||||
_file(NULL),
|
||||
_log(NULL),
|
||||
@@ -262,6 +254,7 @@ class Arguments {
|
||||
_threads(false),
|
||||
_sched(false),
|
||||
_record_cpu(false),
|
||||
_tlab(false),
|
||||
_live(false),
|
||||
_nofree(false),
|
||||
_nobatch(false),
|
||||
@@ -271,7 +264,7 @@ class Arguments {
|
||||
_fdtransfer_path(NULL),
|
||||
_target_cpu(-1),
|
||||
_style(0),
|
||||
_features{1, 1, 1, 1, 1, 1},
|
||||
_features{},
|
||||
_cstack(CSTACK_DEFAULT),
|
||||
_clock(CLK_DEFAULT),
|
||||
_output(OUTPUT_NONE),
|
||||
@@ -302,7 +295,7 @@ class Arguments {
|
||||
|
||||
bool hasOutputFile() const {
|
||||
return _file != NULL &&
|
||||
(_action == ACTION_STOP || _action == ACTION_DUMP ? _output != OUTPUT_JFR : _action >= ACTION_CHECK);
|
||||
(_action == ACTION_STOP || _action == ACTION_DUMP ? _output != OUTPUT_JFR : _action >= ACTION_STATUS);
|
||||
}
|
||||
|
||||
bool hasOption(JfrOption option) const {
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "callTraceStorage.h"
|
||||
#include "os.h"
|
||||
@@ -12,6 +13,7 @@
|
||||
static const u32 INITIAL_CAPACITY = 65536;
|
||||
static const u32 CALL_TRACE_CHUNK = 8 * 1024 * 1024;
|
||||
static const u32 OVERFLOW_TRACE_ID = 0x7fffffff;
|
||||
static const size_t MEM_LIMIT_EXTRA = 0x10000; // reserve up to 64 KB for LongHashTable headers
|
||||
|
||||
|
||||
class LongHashTable {
|
||||
@@ -83,6 +85,8 @@ CallTrace CallTraceStorage::_overflow_trace = {1, {BCI_ERROR, LP64_ONLY(0 COMMA)
|
||||
|
||||
CallTraceStorage::CallTraceStorage() : _allocator(CALL_TRACE_CHUNK) {
|
||||
_current_table = LongHashTable::allocate(NULL, INITIAL_CAPACITY);
|
||||
_used_memory = _current_table->usedMemory();
|
||||
_mem_limit = SIZE_MAX;
|
||||
_overflow = 0;
|
||||
}
|
||||
|
||||
@@ -92,12 +96,14 @@ CallTraceStorage::~CallTraceStorage() {
|
||||
}
|
||||
}
|
||||
|
||||
void CallTraceStorage::clear() {
|
||||
void CallTraceStorage::clear(size_t mem_limit) {
|
||||
while (_current_table->prev() != NULL) {
|
||||
_current_table = _current_table->destroy();
|
||||
}
|
||||
_current_table->clear();
|
||||
_used_memory = _current_table->usedMemory();
|
||||
_allocator.clear();
|
||||
_mem_limit = mem_limit ? mem_limit | MEM_LIMIT_EXTRA : SIZE_MAX;
|
||||
_overflow = 0;
|
||||
}
|
||||
|
||||
@@ -108,11 +114,7 @@ u32 CallTraceStorage::capacity() {
|
||||
}
|
||||
|
||||
size_t CallTraceStorage::usedMemory() {
|
||||
size_t bytes = _allocator.usedMemory();
|
||||
for (LongHashTable* table = _current_table; table != NULL; table = table->prev()) {
|
||||
bytes += table->usedMemory();
|
||||
}
|
||||
return bytes;
|
||||
return _used_memory + _allocator.usedMemory();
|
||||
}
|
||||
|
||||
void CallTraceStorage::collectTraces(std::map<u32, CallTrace*>& map) {
|
||||
@@ -241,15 +243,23 @@ u32 CallTraceStorage::put(int num_frames, ASGCT_CallFrame* frames, u64 counter)
|
||||
|
||||
while (keys[slot] != hash) {
|
||||
if (keys[slot] == 0) {
|
||||
if (usedMemory() > _mem_limit) {
|
||||
// Stop adding new stack traces once memory limit is exceeded
|
||||
atomicInc(_overflow);
|
||||
return OVERFLOW_TRACE_ID;
|
||||
}
|
||||
|
||||
if (!__sync_bool_compare_and_swap(&keys[slot], 0, hash)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Increment the table size, and if the load factor exceeds 0.75, reserve a new table
|
||||
// Increment the table size, and if the load factor exceeds 0.75, reserve a new table.
|
||||
// This condition can be hit only once per table, so the below allocation is race-free.
|
||||
if (table->incSize() == capacity * 3 / 4) {
|
||||
LongHashTable* new_table = LongHashTable::allocate(table, capacity * 2);
|
||||
if (new_table != NULL) {
|
||||
__sync_bool_compare_and_swap(&_current_table, table, new_table);
|
||||
atomicInc(_used_memory, new_table->usedMemory());
|
||||
storeRelease(_current_table, new_table);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -26,11 +26,11 @@ struct CallTraceSample {
|
||||
u64 counter;
|
||||
|
||||
CallTrace* acquireTrace() {
|
||||
return __atomic_load_n(&trace, __ATOMIC_ACQUIRE);
|
||||
return loadAcquire(trace);
|
||||
}
|
||||
|
||||
void setTrace(CallTrace* value) {
|
||||
return __atomic_store_n(&trace, value, __ATOMIC_RELEASE);
|
||||
storeRelease(trace, value);
|
||||
}
|
||||
|
||||
CallTraceSample& operator+=(const CallTraceSample& s) {
|
||||
@@ -47,6 +47,8 @@ class CallTraceStorage {
|
||||
|
||||
LinearAllocator _allocator;
|
||||
LongHashTable* _current_table;
|
||||
size_t _used_memory;
|
||||
size_t _mem_limit;
|
||||
u64 _overflow;
|
||||
|
||||
u64 calcHash(int num_frames, ASGCT_CallFrame* frames);
|
||||
@@ -57,7 +59,7 @@ class CallTraceStorage {
|
||||
CallTraceStorage();
|
||||
~CallTraceStorage();
|
||||
|
||||
void clear();
|
||||
void clear(size_t mem_limit);
|
||||
u32 capacity();
|
||||
size_t usedMemory();
|
||||
u64 overflow() { return _overflow; }
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#define _CODECACHE_H
|
||||
|
||||
#include <jvmti.h>
|
||||
#include "arch.h"
|
||||
|
||||
|
||||
#define NO_MIN_ADDRESS ((const void*)-1)
|
||||
@@ -230,7 +231,7 @@ class CodeCacheArray {
|
||||
}
|
||||
|
||||
int count() {
|
||||
return __atomic_load_n(&_count, __ATOMIC_ACQUIRE);
|
||||
return loadAcquire(_count);
|
||||
}
|
||||
|
||||
size_t usedMemory() {
|
||||
@@ -238,10 +239,10 @@ class CodeCacheArray {
|
||||
}
|
||||
|
||||
void add(CodeCache* lib) {
|
||||
int index = __atomic_load_n(&_count, __ATOMIC_ACQUIRE);
|
||||
int index = loadAcquire(_count);
|
||||
_libs[index] = lib;
|
||||
_used_memory += lib->usedMemory();
|
||||
__atomic_store_n(&_count, index + 1, __ATOMIC_RELEASE);
|
||||
storeRelease(_count, index + 1);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ public class Arguments {
|
||||
public boolean help;
|
||||
public boolean reverse;
|
||||
public boolean inverted;
|
||||
public boolean diff;
|
||||
public boolean cpu;
|
||||
public boolean cpuTime;
|
||||
public boolean wall;
|
||||
|
||||
@@ -20,6 +20,7 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
private static final String[] FRAME_SUFFIX = {"_[0]", "_[j]", "_[i]", "", "", "_[k]", "_[1]"};
|
||||
private static final byte HAS_SUFFIX = (byte) 0x80;
|
||||
private static final int FLUSH_THRESHOLD = 15000;
|
||||
private static final long NEW_FRAME_DIFF = Long.MIN_VALUE;
|
||||
private static final Pattern TID_FRAME_PATTERN = Pattern.compile("\\[(.* )?tid=\\d+]");
|
||||
|
||||
private final Arguments args;
|
||||
@@ -29,11 +30,14 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
|
||||
private String title = "Flame Graph";
|
||||
private int[] order;
|
||||
private int[] cpoolMap;
|
||||
private int depth;
|
||||
private int lastLevel;
|
||||
private long lastX;
|
||||
private long lastTotal;
|
||||
private long lastDiff;
|
||||
private long mintotal;
|
||||
private long maxdiff = -1;
|
||||
|
||||
public FlameGraph(Arguments args) {
|
||||
this.args = args;
|
||||
@@ -90,6 +94,8 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
while (!br.readLine().isEmpty()) ;
|
||||
|
||||
for (String line; !(line = br.readLine()).isEmpty(); ) {
|
||||
if (line.startsWith("d=")) continue; // artifact of a differential flame graph
|
||||
|
||||
StringTokenizer st = new StringTokenizer(line.substring(2, line.length() - 1), ",");
|
||||
int nameAndType = Integer.parseInt(st.nextToken());
|
||||
|
||||
@@ -109,12 +115,10 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
|
||||
int titleIndex = nameAndType >>> 3;
|
||||
byte type = (byte) (nameAndType & 7);
|
||||
if (st.hasMoreTokens() && (type <= TYPE_INLINED || type >= TYPE_C1_COMPILED)) {
|
||||
type = TYPE_JIT_COMPILED;
|
||||
}
|
||||
byte normalizedType = type <= TYPE_INLINED || type >= TYPE_C1_COMPILED ? TYPE_JIT_COMPILED : type;
|
||||
|
||||
Frame f = level > 0 || needRebuild ? new Frame(titleIndex, type) : root;
|
||||
f.self = f.total = total;
|
||||
Frame f = level > 0 || needRebuild ? new Frame(titleIndex, normalizedType) : root;
|
||||
fillFrameCounters(f, type, total);
|
||||
if (st.hasMoreTokens()) f.inlined = Long.parseLong(st.nextToken());
|
||||
if (st.hasMoreTokens()) f.c1 = Long.parseLong(st.nextToken());
|
||||
if (st.hasMoreTokens()) f.interpreted = Long.parseLong(st.nextToken());
|
||||
@@ -177,6 +181,26 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
depth = Math.max(depth, stack.size);
|
||||
}
|
||||
|
||||
public void diff(FlameGraph base) {
|
||||
// Build a map that translates this cpool keys to the base flamegraph's cpool keys
|
||||
cpoolMap = Arrays.stream(cpool.keys()).mapToInt(title -> base.cpool.getOrDefault(title, -1)).toArray();
|
||||
diff(base.root, root);
|
||||
}
|
||||
|
||||
private void diff(Frame base, Frame current) {
|
||||
current.diff = base == null ? NEW_FRAME_DIFF : current.self - base.self;
|
||||
maxdiff = Math.max(maxdiff, Math.abs(current.diff));
|
||||
|
||||
for (Frame child : current.values()) {
|
||||
Frame baseChild = base == null ? null : base.get(translateKey(child.key));
|
||||
diff(baseChild, child);
|
||||
}
|
||||
}
|
||||
|
||||
private int translateKey(int key) {
|
||||
return cpoolMap[key & TITLE_MASK] | (key & ~TITLE_MASK);
|
||||
}
|
||||
|
||||
public void dump(OutputStream out) throws IOException {
|
||||
try (PrintStream ps = new PrintStream(out, false, "UTF-8")) {
|
||||
dump(ps);
|
||||
@@ -205,6 +229,9 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
tail = printTill(out, tail, "/*inverted:*/false");
|
||||
out.print(args.reverse ^ args.inverted);
|
||||
|
||||
tail = printTill(out, tail, "/*maxdiff:*/-1");
|
||||
out.print(maxdiff);
|
||||
|
||||
tail = printTill(out, tail, "/*depth:*/0");
|
||||
out.print(depth);
|
||||
|
||||
@@ -239,6 +266,15 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
}
|
||||
|
||||
private void printFrame(PrintStream out, Frame frame, int level, long x) {
|
||||
StringBuilder sb = outbuf;
|
||||
if (frame.diff != lastDiff) {
|
||||
if (frame.diff == NEW_FRAME_DIFF) {
|
||||
sb.append("d=U\n");
|
||||
} else {
|
||||
sb.append("d=").append(frame.diff).append('\n');
|
||||
}
|
||||
}
|
||||
|
||||
int nameAndType = order[frame.getTitleIndex()] << 3 | frame.getType();
|
||||
boolean hasExtraTypes = (frame.inlined | frame.c1 | frame.interpreted) != 0 &&
|
||||
frame.inlined < frame.total && frame.interpreted < frame.total;
|
||||
@@ -250,7 +286,7 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
func = 'n';
|
||||
}
|
||||
|
||||
StringBuilder sb = outbuf.append(func).append('(').append(nameAndType);
|
||||
sb.append(func).append('(').append(nameAndType);
|
||||
if (func == 'f') {
|
||||
sb.append(',').append(level).append(',').append(x - lastX);
|
||||
}
|
||||
@@ -270,6 +306,7 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
lastLevel = level;
|
||||
lastX = x;
|
||||
lastTotal = frame.total;
|
||||
lastDiff = frame.diff;
|
||||
|
||||
Frame[] children = frame.values().toArray(EMPTY_FRAME_ARRAY);
|
||||
Arrays.sort(children, this);
|
||||
@@ -291,6 +328,9 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
sb.append(strings[frame.getTitleIndex()]).append(FRAME_SUFFIX[frame.getType()]);
|
||||
if (frame.self > 0) {
|
||||
int tmpLength = sb.length();
|
||||
if (maxdiff >= 0) {
|
||||
sb.append(' ').append(frame.diff == NEW_FRAME_DIFF ? 0 : frame.self - frame.diff);
|
||||
}
|
||||
out.print(sb.append(' ').append(frame.self).append('\n'));
|
||||
sb.setLength(tmpLength);
|
||||
}
|
||||
@@ -328,6 +368,21 @@ public class FlameGraph implements Comparator<Frame> {
|
||||
return include != null;
|
||||
}
|
||||
|
||||
private static void fillFrameCounters(Frame frame, byte type, long ticks) {
|
||||
frame.self = frame.total = ticks;
|
||||
switch (type) {
|
||||
case TYPE_INTERPRETED:
|
||||
frame.interpreted = ticks;
|
||||
break;
|
||||
case TYPE_INLINED:
|
||||
frame.inlined = ticks;
|
||||
break;
|
||||
case TYPE_C1_COMPILED:
|
||||
frame.c1 = ticks;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private Frame addChild(Frame frame, String title, byte type, long ticks) {
|
||||
frame.total += ticks;
|
||||
|
||||
|
||||
@@ -16,11 +16,13 @@ public class Frame extends HashMap<Integer, Frame> {
|
||||
public static final byte TYPE_KERNEL = 5;
|
||||
public static final byte TYPE_C1_COMPILED = 6;
|
||||
|
||||
private static final int TYPE_SHIFT = 28;
|
||||
static final int TYPE_SHIFT = 28;
|
||||
static final int TITLE_MASK = (1 << TYPE_SHIFT) - 1;
|
||||
|
||||
final int key;
|
||||
long total;
|
||||
long self;
|
||||
long diff;
|
||||
long inlined, c1, interpreted;
|
||||
|
||||
private Frame(int key) {
|
||||
@@ -36,7 +38,7 @@ public class Frame extends HashMap<Integer, Frame> {
|
||||
}
|
||||
|
||||
int getTitleIndex() {
|
||||
return key & ((1 << TYPE_SHIFT) - 1);
|
||||
return key & TITLE_MASK;
|
||||
}
|
||||
|
||||
byte getType() {
|
||||
|
||||
@@ -30,6 +30,17 @@ public class JfrToHeatmap extends JfrConverter {
|
||||
@Override
|
||||
protected EventCollector createCollector(Arguments args) {
|
||||
return new EventCollector() {
|
||||
long wallInterval;
|
||||
|
||||
private long getWallInterval() {
|
||||
if (wallInterval == 0) {
|
||||
String wall = jfr.settings.get("wall");
|
||||
long interval = Long.parseLong(wall != null ? wall : jfr.settings.get("interval"));
|
||||
wallInterval = interval != 0 ? interval : 50_000_000;
|
||||
}
|
||||
return wallInterval;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(Event event) {
|
||||
int classId = 0;
|
||||
@@ -42,10 +53,14 @@ public class JfrToHeatmap extends JfrConverter {
|
||||
type = TYPE_KERNEL;
|
||||
}
|
||||
|
||||
long msFromStart = (event.time - jfr.chunkStartTicks) * 1_000 / jfr.ticksPerSec;
|
||||
long timeMs = jfr.chunkStartNanos / 1_000_000 + msFromStart;
|
||||
|
||||
heatmap.addEvent(event.stackTraceId, event.tid, classId, type, timeMs);
|
||||
long timeNs = jfr.eventTimeToNanos(event.time);
|
||||
long samples = event.samples();
|
||||
while (true) {
|
||||
heatmap.addEvent(event.stackTraceId, event.tid, classId, type, timeNs / 1_000_000);
|
||||
if (--samples <= 0) break;
|
||||
// Only wall clock events can have samples > 1
|
||||
timeNs += getWallInterval();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -62,6 +77,7 @@ public class JfrToHeatmap extends JfrConverter {
|
||||
@Override
|
||||
public void afterChunk() {
|
||||
jfr.stackTraces.clear();
|
||||
wallInterval = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -7,10 +7,10 @@ package one.convert;
|
||||
|
||||
import static one.convert.OtlpConstants.*;
|
||||
|
||||
import one.jfr.Dictionary;
|
||||
import one.jfr.JfrReader;
|
||||
import one.jfr.StackTrace;
|
||||
import one.jfr.event.Event;
|
||||
import one.jfr.event.EventCollector;
|
||||
import one.jfr.event.*;
|
||||
import one.proto.Proto;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
@@ -33,6 +33,11 @@ public class JfrToOtlp extends JfrConverter {
|
||||
private final Index<IntArray> stacksPool = new Index<>(IntArray.class, IntArray.EMPTY);
|
||||
private final int threadNameIndex = stringPool.index(OTLP_THREAD_NAME);
|
||||
|
||||
private final Dictionary<AggregatedEvent> aggregatedEvents = new Dictionary<>();
|
||||
// Chunk-private cache to remember mappings from stacktrace ID to OTLP stack index
|
||||
private final Map<Integer, Integer> stacksIndexCache = new HashMap<>();
|
||||
private double chunkCounterFactor;
|
||||
|
||||
private final Proto proto = new Proto(1024);
|
||||
|
||||
public JfrToOtlp(JfrReader jfr, Arguments args) {
|
||||
@@ -43,6 +48,50 @@ public class JfrToOtlp extends JfrConverter {
|
||||
out.write(proto.buffer(), 0, proto.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected EventCollector createCollector(Arguments args) {
|
||||
return new EventCollector() {
|
||||
public void beforeChunk() {
|
||||
chunkCounterFactor = counterFactor();
|
||||
aggregatedEvents.clear();
|
||||
stacksIndexCache.clear();
|
||||
}
|
||||
|
||||
public void collect(Event e) {
|
||||
if (excludeStack(e.stackTraceId, e.tid, 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
long key = ((long) e.tid) << 32 | e.stackTraceId;
|
||||
AggregatedEvent ec = aggregatedEvents.get(key);
|
||||
if (ec == null) {
|
||||
ec = new AggregatedEvent();
|
||||
aggregatedEvents.put(key, ec);
|
||||
}
|
||||
|
||||
long recordedValue = !args.total ? e.samples() : chunkCounterFactor == 1.0 ? e.value() : (long) (e.value() * chunkCounterFactor);
|
||||
ec.recordEvent(getUnixTimestampNanos(e.time), recordedValue);
|
||||
}
|
||||
|
||||
private long getUnixTimestampNanos(long jfrTimestamp) {
|
||||
long nanosFromStart = (long) ((jfrTimestamp - jfr.chunkStartTicks) * jfr.nanosPerTick);
|
||||
return jfr.chunkStartNanos + nanosFromStart;
|
||||
}
|
||||
|
||||
public void afterChunk() {}
|
||||
|
||||
public boolean finish() {
|
||||
aggregatedEvents.clear();
|
||||
stacksIndexCache.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
public void forEach(Visitor visitor) {
|
||||
throw new UnsupportedOperationException("Not supported");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void convert() throws IOException {
|
||||
long rpMark = proto.startField(PROFILES_DATA_resource_profiles, MSG_LARGE);
|
||||
@@ -56,9 +105,6 @@ public class JfrToOtlp extends JfrConverter {
|
||||
|
||||
@Override
|
||||
protected void convertChunk() {
|
||||
List<SampleInfo> samplesInfo = new ArrayList<>();
|
||||
collector.forEach(new OtlpEventToSampleVisitor(samplesInfo));
|
||||
|
||||
long pMark = proto.startField(SCOPE_PROFILES_profiles, MSG_LARGE);
|
||||
|
||||
long sttMark = proto.startField(PROFILE_sample_type, MSG_SMALL);
|
||||
@@ -70,20 +116,62 @@ public class JfrToOtlp extends JfrConverter {
|
||||
proto.fieldFixed64(PROFILE_time_unix_nano, jfr.chunkStartNanos);
|
||||
proto.field(PROFILE_duration_nanos, jfr.chunkDurationNanos());
|
||||
|
||||
writeSamples(samplesInfo, !args.total /* samples */);
|
||||
aggregatedEvents.forEach((key, value) -> {
|
||||
int stackTraceId = (int) key;
|
||||
int tid = (int) (key >> 32);
|
||||
writeSample(stackTraceId, tid, value);
|
||||
});
|
||||
|
||||
proto.commitField(pMark);
|
||||
}
|
||||
|
||||
private void writeSamples(List<SampleInfo> samplesInfo, boolean samples) {
|
||||
for (SampleInfo si : samplesInfo) {
|
||||
long sMark = proto.startField(PROFILE_samples, MSG_SMALL);
|
||||
proto.field(SAMPLE_stack_index, si.stackIndex);
|
||||
proto.field(SAMPLE_values, samples ? si.samples : si.value);
|
||||
proto.field(SAMPLE_attribute_indices, si.threadNameAttributeIndex);
|
||||
proto.fieldFixed64(SAMPLE_timestamps_unix_nano, si.timeNanos);
|
||||
proto.commitField(sMark);
|
||||
private IntArray makeStack(int stackTraceId) {
|
||||
StackTrace st = jfr.stackTraces.get(stackTraceId);
|
||||
int[] stack = new int[st.methods.length];
|
||||
for (int i = 0; i < st.methods.length; ++i) {
|
||||
stack[i] = linePool.index(makeLine(st, i));
|
||||
}
|
||||
return new IntArray(stack);
|
||||
}
|
||||
|
||||
private Line makeLine(StackTrace stackTrace, int i) {
|
||||
String methodName = getMethodName(stackTrace.methods[i], stackTrace.types[i]);
|
||||
int lineNumber = stackTrace.locations[i] >>> 16;
|
||||
int functionIdx = functionPool.index(methodName);
|
||||
return new Line(functionIdx, lineNumber);
|
||||
}
|
||||
|
||||
private void writeSample(int stackTraceId, int tid, AggregatedEvent ae) {
|
||||
// 24 is the sum of:
|
||||
// 4 tags: 1 byte
|
||||
// 5 * 2: max size of thread name and stack idx
|
||||
// 5 * 2: max size of timestamps/values arrays
|
||||
int maxLengthBytes = varintSize(24 + ae.eventsCount * (8 /* fixed64 */ + 10 /* max varint */));
|
||||
long sMark = proto.startField(PROFILE_samples, maxLengthBytes);
|
||||
|
||||
proto.field(SAMPLE_stack_index, stacksIndexCache.computeIfAbsent(stackTraceId, key -> stacksPool.index(makeStack(key))));
|
||||
|
||||
String threadName = getThreadName(tid);
|
||||
KeyValue threadNameKv = new KeyValue(threadNameIndex, threadName);
|
||||
proto.field(SAMPLE_attribute_indices, attributesPool.index(threadNameKv));
|
||||
|
||||
long tMark = proto.startField(SAMPLE_timestamps_unix_nano, varintSize(8 * ae.eventsCount));
|
||||
for (int i = 0; i < ae.eventsCount; ++i) {
|
||||
proto.writeFixed64(ae.timestamps[i]);
|
||||
}
|
||||
proto.commitField(tMark);
|
||||
|
||||
long vMark = proto.startField(SAMPLE_values, varintSize(10 * ae.eventsCount));
|
||||
for (int i = 0; i < ae.eventsCount; ++i) {
|
||||
proto.writeLong(ae.values[i]);
|
||||
}
|
||||
proto.commitField(vMark);
|
||||
|
||||
proto.commitField(sMark);
|
||||
}
|
||||
|
||||
private static int varintSize(long value) {
|
||||
return (640 - Long.numberOfLeadingZeros(value | 1) * 9) / 64;
|
||||
}
|
||||
|
||||
private void writeProfileDictionary() {
|
||||
@@ -150,65 +238,6 @@ public class JfrToOtlp extends JfrConverter {
|
||||
}
|
||||
}
|
||||
|
||||
private static final class SampleInfo {
|
||||
final long timeNanos;
|
||||
final int threadNameAttributeIndex;
|
||||
final int stackIndex;
|
||||
final long samples;
|
||||
final long value;
|
||||
|
||||
SampleInfo(long timeNanos, int threadNameAttributeIndex, int stackIndex, long samples, long value) {
|
||||
this.timeNanos = timeNanos;
|
||||
this.threadNameAttributeIndex = threadNameAttributeIndex;
|
||||
this.stackIndex = stackIndex;
|
||||
this.samples = samples;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
private final class OtlpEventToSampleVisitor implements EventCollector.Visitor {
|
||||
private final List<SampleInfo> samplesInfo;
|
||||
// Chunk-private cache to remember mappings from stacktrace ID to OTLP stack index
|
||||
private final Map<Integer, Integer> stacksIndexCache = new HashMap<>();
|
||||
private final double factor = counterFactor();
|
||||
|
||||
public OtlpEventToSampleVisitor(List<SampleInfo> samplesInfo) {
|
||||
this.samplesInfo = samplesInfo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(Event event, long samples, long value) {
|
||||
if (excludeStack(event.stackTraceId, event.tid, 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
String threadName = getThreadName(event.tid);
|
||||
KeyValue threadNameKv = new KeyValue(threadNameIndex, threadName);
|
||||
int stackIndex = stacksIndexCache.computeIfAbsent(event.stackTraceId, key -> stacksPool.index(makeStack(key)));
|
||||
long nanosFromStart = (long) ((event.time - jfr.chunkStartTicks) * jfr.nanosPerTick);
|
||||
long timeNanos = jfr.chunkStartNanos + nanosFromStart;
|
||||
SampleInfo si = new SampleInfo(timeNanos, attributesPool.index(threadNameKv), stackIndex, samples,
|
||||
factor == 1.0 ? value : (long) (value * factor));
|
||||
samplesInfo.add(si);
|
||||
}
|
||||
|
||||
private IntArray makeStack(int stackTraceId) {
|
||||
StackTrace st = jfr.stackTraces.get(stackTraceId);
|
||||
int[] stack = new int[st.methods.length];
|
||||
for (int i = 0; i < st.methods.length; ++i) {
|
||||
stack[i] = linePool.index(makeLine(st, i));
|
||||
}
|
||||
return new IntArray(stack);
|
||||
}
|
||||
|
||||
private Line makeLine(StackTrace stackTrace, int i) {
|
||||
String methodName = getMethodName(stackTrace.methods[i], stackTrace.types[i]);
|
||||
int lineNumber = stackTrace.locations[i] >>> 16;
|
||||
int functionIdx = functionPool.index(methodName);
|
||||
return new Line(functionIdx, lineNumber);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Line {
|
||||
static final Line EMPTY = new Line(0, 0);
|
||||
|
||||
@@ -285,4 +314,21 @@ public class JfrToOtlp extends JfrConverter {
|
||||
return hash;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class AggregatedEvent {
|
||||
long[] timestamps = new long[1];
|
||||
long[] values = new long[1];
|
||||
int eventsCount = 0;
|
||||
|
||||
public void recordEvent(long timestamp, long value) {
|
||||
if (eventsCount == timestamps.length) {
|
||||
int newSize = timestamps.length * 2;
|
||||
timestamps = Arrays.copyOf(timestamps, newSize);
|
||||
values = Arrays.copyOf(values, newSize);
|
||||
}
|
||||
timestamps[eventsCount] = timestamp;
|
||||
values[eventsCount] = value;
|
||||
++eventsCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,10 +3,11 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import one.convert.*;
|
||||
package one.convert;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
public class Main {
|
||||
@@ -18,7 +19,7 @@ public class Main {
|
||||
return;
|
||||
}
|
||||
|
||||
if (args.files.size() == 1) {
|
||||
if (args.files.size() == (args.diff ? 2 : 1)) {
|
||||
args.files.add(".");
|
||||
}
|
||||
|
||||
@@ -35,6 +36,34 @@ public class Main {
|
||||
}
|
||||
}
|
||||
|
||||
if (args.diff) {
|
||||
if (fileCount != 2) {
|
||||
throw new IllegalArgumentException("--diff option requires two input files");
|
||||
}
|
||||
if (!"html".equals(args.output) && !"collapsed".equals(args.output)) {
|
||||
throw new IllegalArgumentException("--diff option requires html or collapsed output format");
|
||||
}
|
||||
|
||||
args.norm = true; // don't let random IDs in class names spoil comparison
|
||||
|
||||
String input1 = args.files.get(0);
|
||||
String input2 = args.files.get(1);
|
||||
String output = isDirectory ? new File(lastFile, replaceExt(input2, "diff." + args.output)).getPath() : lastFile;
|
||||
|
||||
System.out.print("Converting " + getFileName(input2) + " vs " + getFileName(input1) + " -> " + getFileName(output) + " ");
|
||||
System.out.flush();
|
||||
|
||||
long startTime = System.nanoTime();
|
||||
FlameGraph base = parseFlameGraph(input1, args);
|
||||
FlameGraph current = parseFlameGraph(input2, args);
|
||||
current.diff(base);
|
||||
current.dump(new FileOutputStream(output));
|
||||
long endTime = System.nanoTime();
|
||||
|
||||
System.out.print("# " + (endTime - startTime) / 1000000 / 1000.0 + " s\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < fileCount; i++) {
|
||||
String input = args.files.get(i);
|
||||
String output = isDirectory ? new File(lastFile, replaceExt(input, args.output)).getPath() : lastFile;
|
||||
@@ -106,6 +135,7 @@ public class Main {
|
||||
" -o --output FORMAT Output format: html, collapsed, pprof, pb.gz, heatmap, otlp\n" +
|
||||
" -I --include REGEX Include only stacks with the specified frames\n" +
|
||||
" -X --exclude REGEX Exclude stacks with the specified frames\n" +
|
||||
" --diff Create differential Flame Graph from two input files\n" +
|
||||
"\n" +
|
||||
"JFR options:\n" +
|
||||
" --cpu CPU profile (ExecutionSample)\n" +
|
||||
@@ -12,6 +12,7 @@ import java.util.Arrays;
|
||||
*/
|
||||
public class Dictionary<T> {
|
||||
private static final int INITIAL_CAPACITY = 16;
|
||||
private static final long USED_BIT = 1L << 63;
|
||||
|
||||
private long[] keys;
|
||||
private Object[] values;
|
||||
@@ -37,12 +38,12 @@ public class Dictionary<T> {
|
||||
}
|
||||
|
||||
// key[i]==0 is used to signal that the i-th position is unset.
|
||||
// Thus, we map key=key+1, so the user can still use key=0.
|
||||
// Thus, we flip USED_BIT, so the user can still use key=0.
|
||||
private static long remapKey(long key) {
|
||||
if (key < 0) {
|
||||
throw new IllegalArgumentException("Negative keys not allowed");
|
||||
if (key == USED_BIT) {
|
||||
throw new IllegalArgumentException("Key not allowed");
|
||||
}
|
||||
return key + 1;
|
||||
return key ^ USED_BIT;
|
||||
}
|
||||
|
||||
public void put(long key, T value) {
|
||||
@@ -82,7 +83,7 @@ public class Dictionary<T> {
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
if (keys[i] != 0) {
|
||||
// Map key back, see remapKey
|
||||
visitor.visit(keys[i] - 1, (T) values[i]);
|
||||
visitor.visit(keys[i] ^ USED_BIT, (T) values[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,12 +211,14 @@ public class JfrReader implements Closeable {
|
||||
} else {
|
||||
Constructor<? extends Event> customEvent = customEvents.get(type);
|
||||
if (customEvent != null && (cls == null || cls == customEvent.getDeclaringClass())) {
|
||||
long eventEnd = filePosition + pos + size;
|
||||
ensureBytes(size - (buf.position() - pos));
|
||||
try {
|
||||
return (E) customEvent.newInstance(this);
|
||||
} catch (ReflectiveOperationException e) {
|
||||
throw new IllegalStateException(e);
|
||||
} finally {
|
||||
seek(filePosition + pos + size);
|
||||
seek(eventEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,14 +42,14 @@ static int pthread_setspecific_hook(pthread_key_t key, const void* value) {
|
||||
}
|
||||
|
||||
void CpuEngine::onThreadStart() {
|
||||
CpuEngine* current = __atomic_load_n(&_current, __ATOMIC_ACQUIRE);
|
||||
CpuEngine* current = loadAcquire(_current);
|
||||
if (current != NULL) {
|
||||
current->createForThread(OS::threadId());
|
||||
}
|
||||
}
|
||||
|
||||
void CpuEngine::onThreadEnd() {
|
||||
CpuEngine* current = __atomic_load_n(&_current, __ATOMIC_ACQUIRE);
|
||||
CpuEngine* current = loadAcquire(_current);
|
||||
if (current != NULL) {
|
||||
current->destroyForThread(OS::threadId());
|
||||
}
|
||||
@@ -80,12 +80,12 @@ bool CpuEngine::setupThreadHook() {
|
||||
|
||||
void CpuEngine::enableThreadHook() {
|
||||
*_pthread_entry = (void*)pthread_setspecific_hook;
|
||||
__atomic_store_n(&_current, this, __ATOMIC_RELEASE);
|
||||
storeRelease(_current, this);
|
||||
}
|
||||
|
||||
void CpuEngine::disableThreadHook() {
|
||||
*_pthread_entry = (void*)pthread_setspecific;
|
||||
__atomic_store_n(&_current, NULL, __ATOMIC_RELEASE);
|
||||
storeRelease(_current, nullptr);
|
||||
}
|
||||
|
||||
bool CpuEngine::isResourceLimit(int err) {
|
||||
@@ -124,9 +124,8 @@ void CpuEngine::signalHandlerJ9(int signo, siginfo_t* siginfo, void* ucontext) {
|
||||
if (!_enabled) return;
|
||||
|
||||
J9StackTraceNotification notif;
|
||||
StackContext java_ctx;
|
||||
notif.num_frames = _cstack == CSTACK_NO ? 0 : _cstack == CSTACK_DWARF
|
||||
? StackWalker::walkDwarf(ucontext, notif.addr, MAX_J9_NATIVE_FRAMES, &java_ctx)
|
||||
: StackWalker::walkFP(ucontext, notif.addr, MAX_J9_NATIVE_FRAMES, &java_ctx);
|
||||
? StackWalker::walkDwarf(ucontext, notif.addr, MAX_J9_NATIVE_FRAMES)
|
||||
: StackWalker::walkFP(ucontext, notif.addr, MAX_J9_NATIVE_FRAMES);
|
||||
J9StackTraces::checkpoint(_interval, ¬if);
|
||||
}
|
||||
|
||||
@@ -60,7 +60,7 @@ static jmethodID _stop_method;
|
||||
static jmethodID _box_method;
|
||||
static bool _jfr_starting = false;
|
||||
|
||||
static const char* const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "lbr", "vm"};
|
||||
static const char* const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "vm"};
|
||||
|
||||
|
||||
struct CpuTime {
|
||||
@@ -539,28 +539,22 @@ class Recording {
|
||||
return true;
|
||||
}
|
||||
|
||||
static const char* getFeaturesString(char* str, size_t size, StackWalkFeatures& f) {
|
||||
snprintf(str, size, "%s %s %s %s %s %s %s %s %s %s %s %s %s",
|
||||
f.unknown_java ? "unknown_java" : "-",
|
||||
f.unwind_stub ? "unwind_stub" : "-",
|
||||
f.unwind_comp ? "unwind_comp" : "-",
|
||||
f.unwind_native ? "unwind_native" : "-",
|
||||
f.java_anchor ? "java_anchor" : "-",
|
||||
f.gc_traces ? "gc_traces" : "-",
|
||||
f.stats ? "stats" : "-",
|
||||
f.jnienv ? "jnienv" : "-",
|
||||
f.probe_sp ? "probesp" : "-",
|
||||
f.mixed ? "mixed" : "-",
|
||||
f.vtable_target ? "vtable" : "-",
|
||||
f.comp_task ? "comptask" : "-",
|
||||
f.pc_addr ? "pcaddr" : "-");
|
||||
return str;
|
||||
static const char* getFeaturesString(char* str, size_t size, StackWalkFeatures f) {
|
||||
int chars = snprintf(str, size, "%s%s%s%s%s%s%s",
|
||||
f.stats ? ",stats" : "",
|
||||
f.jnienv ? ",jnienv" : "",
|
||||
f.agct ? ",agct" : "",
|
||||
f.mixed ? ",mixed" : "",
|
||||
f.vtable_target ? ",vtable" : "",
|
||||
f.comp_task ? ",comptask" : "",
|
||||
f.pc_addr ? ",pcaddr" : "");
|
||||
return chars > 0 ? str + 1 : "";
|
||||
}
|
||||
|
||||
void flush(Buffer* buf) {
|
||||
ssize_t result = write(_in_memory ? _memfd : _fd, buf->data(), buf->offset());
|
||||
if (result > 0) {
|
||||
atomicInc(_bytes_written, result);
|
||||
atomicInc(_bytes_written, (u64)result);
|
||||
}
|
||||
buf->reset();
|
||||
}
|
||||
@@ -650,6 +644,7 @@ class Recording {
|
||||
writeIntSetting(buf, T_ACTIVE_RECORDING, "jfropts", args._jfr_options);
|
||||
writeIntSetting(buf, T_ACTIVE_RECORDING, "chunksize", args._chunk_size);
|
||||
writeIntSetting(buf, T_ACTIVE_RECORDING, "chunktime", args._chunk_time);
|
||||
writeIntSetting(buf, T_ACTIVE_RECORDING, "memlimit", args._mem_limit);
|
||||
|
||||
char str[256];
|
||||
writeStringSetting(buf, T_ACTIVE_RECORDING, "features", getFeaturesString(str, sizeof(str), args._features));
|
||||
@@ -1367,7 +1362,7 @@ Error FlightRecorder::startMasterRecording(Arguments& args, const char* filename
|
||||
jclass cls = env->DefineClass(JFR_SYNC_NAME, NULL, (const jbyte*)JFR_SYNC_CLASS, INCBIN_SIZEOF(JFR_SYNC_CLASS));
|
||||
if (cls == NULL || env->RegisterNatives(cls, &native_method, 1) != 0
|
||||
|| (_start_method = env->GetStaticMethodID(cls, "start", "(Ljava/lang/String;Ljava/lang/String;I)V")) == NULL
|
||||
|| (_stop_method = env->GetStaticMethodID(cls, "stop", "()V")) == NULL
|
||||
|| (_stop_method = env->GetStaticMethodID(cls, "stop", "()Z")) == NULL
|
||||
|| (_box_method = env->GetStaticMethodID(cls, "box", "(I)Ljava/lang/Integer;")) == NULL
|
||||
|| (_jfr_sync_class = (jclass)env->NewGlobalRef(cls)) == NULL) {
|
||||
env->ExceptionDescribe();
|
||||
@@ -1403,9 +1398,9 @@ Error FlightRecorder::startMasterRecording(Arguments& args, const char* filename
|
||||
int event_mask = args.eventMask() |
|
||||
((args._jfr_options ^ JFR_SYNC_OPTS) << EVENT_MASK_SIZE);
|
||||
|
||||
__atomic_store_n(&_jfr_starting, true, __ATOMIC_RELEASE);
|
||||
storeRelease(_jfr_starting, true);
|
||||
env->CallStaticVoidMethod(_jfr_sync_class, _start_method, jfilename, jsettings, event_mask);
|
||||
__atomic_store_n(&_jfr_starting, false, __ATOMIC_RELEASE);
|
||||
storeRelease(_jfr_starting, false);
|
||||
|
||||
if (env->ExceptionCheck()) {
|
||||
env->ExceptionDescribe();
|
||||
@@ -1417,7 +1412,9 @@ Error FlightRecorder::startMasterRecording(Arguments& args, const char* filename
|
||||
|
||||
void FlightRecorder::stopMasterRecording() {
|
||||
JNIEnv* env = VM::jni();
|
||||
env->CallStaticVoidMethod(_jfr_sync_class, _stop_method);
|
||||
if (env->CallStaticBooleanMethod(_jfr_sync_class, _stop_method) == JNI_FALSE) {
|
||||
Log::warn("Failed to stop JFR recording");
|
||||
}
|
||||
env->ExceptionClear();
|
||||
}
|
||||
|
||||
@@ -1496,5 +1493,5 @@ void FlightRecorder::recordLog(LogLevel level, const char* message, size_t len)
|
||||
}
|
||||
|
||||
bool FlightRecorder::isJfrStarting() {
|
||||
return __atomic_load_n(&_jfr_starting, __ATOMIC_ACQUIRE);
|
||||
return loadAcquire(_jfr_starting);
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -16,6 +16,7 @@ import java.nio.file.NoSuchFileException;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.ParseException;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.concurrent.locks.LockSupport;
|
||||
|
||||
/**
|
||||
* Synchronize async-profiler recording with an existing JFR recording.
|
||||
@@ -77,13 +78,24 @@ class JfrSync implements FlightRecorderListener {
|
||||
recording.start();
|
||||
}
|
||||
|
||||
public static void stop() {
|
||||
public static boolean stop() {
|
||||
Recording recording = masterRecording;
|
||||
if (recording != null) {
|
||||
// Disable state change notification before stopping
|
||||
masterRecording = null;
|
||||
recording.stop();
|
||||
try {
|
||||
recording.stop();
|
||||
} catch (IllegalStateException e) {
|
||||
// Workaround the JDK issue: JFR shutdown hook may stop the recording concurrently
|
||||
// then populate the target file outside the state lock.
|
||||
// Once the file is completely written, the recording state is changed to CLOSED.
|
||||
for (int pause = 10; recording.getState() != RecordingState.CLOSED && pause < 1000; pause *= 2) {
|
||||
LockSupport.parkNanos(pause * 1_000_000L);
|
||||
}
|
||||
return recording.getState() == RecordingState.CLOSED;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void disableBuiltinEvents(Recording recording, int eventMask) {
|
||||
|
||||
Binary file not shown.
@@ -17,7 +17,7 @@ import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
class Server extends Thread implements Executor, HttpHandler {
|
||||
private static final String[] COMMANDS = "start,resume,stop,dump,check,status,metrics,list,version".split(",");
|
||||
private static final String[] COMMANDS = "start,resume,stop,dump,status,metrics,list,version".split(",");
|
||||
|
||||
private final HttpServer server;
|
||||
private final AtomicInteger threadNum = new AtomicInteger();
|
||||
|
||||
@@ -38,7 +38,7 @@ class J9VMThread {
|
||||
}
|
||||
|
||||
void setOverflowMark() {
|
||||
__atomic_store_n(&_overflow_mark, (uintptr_t)-1, __ATOMIC_RELEASE);
|
||||
storeRelease(_overflow_mark, (uintptr_t)-1);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -47,7 +47,7 @@ pthread_t J9StackTraces::_thread = 0;
|
||||
int J9StackTraces::_max_stack_depth;
|
||||
int J9StackTraces::_pipe[2];
|
||||
|
||||
static JNIEnv* _self_env = NULL;
|
||||
static JNIEnv* _self_env = nullptr;
|
||||
|
||||
|
||||
Error J9StackTraces::start(Arguments& args) {
|
||||
@@ -78,7 +78,7 @@ void J9StackTraces::stop() {
|
||||
|
||||
void J9StackTraces::timerLoop() {
|
||||
JNIEnv* jni = VM::attachThread("Async-profiler Sampler");
|
||||
__atomic_store_n(&_self_env, jni, __ATOMIC_RELEASE);
|
||||
storeRelease(_self_env, jni);
|
||||
|
||||
jni->PushLocalFrame(64);
|
||||
|
||||
@@ -145,13 +145,13 @@ void J9StackTraces::timerLoop() {
|
||||
free(jvmti_frames);
|
||||
free(frames);
|
||||
|
||||
__atomic_store_n(&_self_env, NULL, __ATOMIC_RELEASE);
|
||||
storeRelease(_self_env, nullptr);
|
||||
VM::detachThread();
|
||||
}
|
||||
|
||||
void J9StackTraces::checkpoint(u64 counter, J9StackTraceNotification* notif) {
|
||||
JNIEnv* self_env = __atomic_load_n(&_self_env, __ATOMIC_ACQUIRE);
|
||||
if (self_env == NULL) {
|
||||
JNIEnv* self_env = loadAcquire(_self_env);
|
||||
if (self_env == nullptr) {
|
||||
// Sampler thread is not ready
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
LinearAllocator::LinearAllocator(size_t chunk_size) {
|
||||
_chunk_size = chunk_size;
|
||||
_used_memory = chunk_size;
|
||||
_reserve = _tail = allocateChunk(NULL);
|
||||
}
|
||||
|
||||
@@ -28,14 +29,7 @@ void LinearAllocator::clear() {
|
||||
}
|
||||
_reserve = _tail;
|
||||
_tail->offs = sizeof(Chunk);
|
||||
}
|
||||
|
||||
size_t LinearAllocator::usedMemory() {
|
||||
size_t bytes = _reserve->prev == _tail ? _chunk_size : 0;
|
||||
for (Chunk* chunk = _tail; chunk != NULL; chunk = chunk->prev) {
|
||||
bytes += _chunk_size;
|
||||
}
|
||||
return bytes;
|
||||
_used_memory = _chunk_size;
|
||||
}
|
||||
|
||||
void* LinearAllocator::alloc(size_t size) {
|
||||
@@ -99,5 +93,11 @@ Chunk* LinearAllocator::getNextChunk(Chunk* current) {
|
||||
|
||||
// Expected case: a new chunk is already reserved
|
||||
Chunk* tail = __sync_val_compare_and_swap(&_tail, current, reserve);
|
||||
return tail == current ? reserve : tail;
|
||||
if (tail == current) {
|
||||
// CAS succeeded: switched to a new chunk
|
||||
atomicInc(_used_memory, _chunk_size);
|
||||
return reserve;
|
||||
}
|
||||
// Lost the race to another thread
|
||||
return tail;
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ struct Chunk {
|
||||
class LinearAllocator {
|
||||
private:
|
||||
size_t _chunk_size;
|
||||
size_t _used_memory;
|
||||
Chunk* _tail;
|
||||
Chunk* _reserve;
|
||||
|
||||
@@ -31,8 +32,11 @@ class LinearAllocator {
|
||||
LinearAllocator(size_t chunk_size);
|
||||
~LinearAllocator();
|
||||
|
||||
size_t usedMemory() const {
|
||||
return _used_memory;
|
||||
}
|
||||
|
||||
void clear();
|
||||
size_t usedMemory();
|
||||
|
||||
void* alloc(size_t size);
|
||||
};
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "demangle.h"
|
||||
#include "dictionary.h"
|
||||
#include "index.h"
|
||||
|
||||
@@ -54,6 +54,7 @@ static const char USAGE_STRING[] =
|
||||
" -g, --sig print method signatures\n"
|
||||
" -a, --ann annotate Java methods\n"
|
||||
" -l, --lib prepend library names\n"
|
||||
" --dot dotted class names\n"
|
||||
" -o fmt output format: flat|traces|collapsed|flamegraph|tree|jfr|otlp\n"
|
||||
" -I include output only stack traces containing the specified pattern\n"
|
||||
" -X exclude exclude stack traces with the specified pattern\n"
|
||||
@@ -69,6 +70,7 @@ static const char USAGE_STRING[] =
|
||||
"\n"
|
||||
" --loop time run profiler in a loop\n"
|
||||
" --alloc bytes allocation profiling interval in bytes\n"
|
||||
" --tlab use TLAB events for allocation profiling\n"
|
||||
" --live build allocation profile from live objects only\n"
|
||||
" --nativemem bytes native allocation profiling interval in bytes\n"
|
||||
" --nofree do not collect free calls in native allocation profiling\n"
|
||||
@@ -76,19 +78,21 @@ static const char USAGE_STRING[] =
|
||||
" --lock time lock profiling threshold in nanoseconds\n"
|
||||
" --nativelock time pthread mutex/rwlock profiling threshold in nanoseconds\n"
|
||||
" --wall interval wall clock profiling interval\n"
|
||||
" --nobatch legacy wall clock sampling without batch events\n"
|
||||
" --proc interval process sampling interval (default: 30s)\n"
|
||||
" --all shorthand for enabling cpu, wall, alloc, live,\n"
|
||||
" nativemem and lock profiling simultaneously\n"
|
||||
" --total accumulate the total value (time, bytes, etc.)\n"
|
||||
" --all-user only include user-mode events\n"
|
||||
" --sched group threads by scheduling policy\n"
|
||||
" --cstack mode how to traverse C stack: fp|dwarf|lbr|vm|no\n"
|
||||
" --cstack mode how to traverse C stack: fp|dwarf|vm|no\n"
|
||||
" --signal num use alternative signal for cpu or wall clock profiling\n"
|
||||
" --clock source clock source for JFR timestamps: tsc|monotonic\n"
|
||||
" --begin function begin profiling when function is executed\n"
|
||||
" --end function end profiling when function is executed\n"
|
||||
" --ttsp only time-to-safepoint profiling \n"
|
||||
" --nostop do not stop profiling outside --begin/--end window\n"
|
||||
" --memlimit bytes limit size of the stack trace storage\n"
|
||||
" --jfropts opts JFR recording options: mem\n"
|
||||
" --jfrsync config synchronize profiler with JFR recording\n"
|
||||
" --libpath path full path to libasyncProfiler.so in the container\n"
|
||||
@@ -412,7 +416,7 @@ int main(int argc, const char** argv) {
|
||||
while (args.count() > 0 && !(jattach_action && pid)) {
|
||||
String arg = args.next();
|
||||
|
||||
if (arg == "start" || arg == "resume" || arg == "stop" || arg == "dump" || arg == "check" ||
|
||||
if (arg == "start" || arg == "resume" || arg == "stop" || arg == "dump" ||
|
||||
arg == "status" || arg == "metrics" || arg == "list" || arg == "collect") {
|
||||
action = arg;
|
||||
|
||||
@@ -500,31 +504,22 @@ int main(int argc, const char** argv) {
|
||||
} else if (arg == "--width" || arg == "--height" || arg == "--minwidth") {
|
||||
format << "," << (arg.str() + 2) << "=" << args.next();
|
||||
|
||||
} else if (arg == "--reverse" || arg == "--inverted" || arg == "--samples" || arg == "--total" ||
|
||||
arg == "--sched" || arg == "--live" || arg == "--nofree" || arg == "--record-cpu") {
|
||||
} else if (arg == "--reverse" || arg == "--inverted" || arg == "--samples" || arg == "--total") {
|
||||
format << "," << (arg.str() + 2);
|
||||
|
||||
} else if (arg == "--alloc" || arg == "--nativemem" || arg == "--nativelock" || arg == "--lock" ||
|
||||
arg == "--wall" || arg == "--trace" || arg == "--chunksize" || arg == "--chunktime" ||
|
||||
arg == "--cstack" || arg == "--signal" || arg == "--clock" || arg == "--begin" || arg == "--end" ||
|
||||
arg == "--target-cpu" || arg == "--proc") {
|
||||
arg == "--target-cpu" || arg == "--proc" || arg == "--memlimit") {
|
||||
params << "," << (arg.str() + 2) << "=" << args.next();
|
||||
|
||||
} else if (arg == "--ttsp") {
|
||||
params << ",ttsp";
|
||||
|
||||
} else if (arg == "--nostop") {
|
||||
params << ",nostop";
|
||||
|
||||
} else if (arg == "--all") {
|
||||
params << ",all";
|
||||
} else if (arg == "--all" || arg == "--live" || arg == "--nobatch" || arg == "--nofree" || arg == "--nostop" ||
|
||||
arg == "--record-cpu" || arg == "--sched" || arg == "--tlab" || arg == "--ttsp") {
|
||||
params << "," << (arg.str() + 2);
|
||||
|
||||
} else if (arg == "--all-user") {
|
||||
params << ",alluser";
|
||||
|
||||
} else if (arg == "--safe-mode") {
|
||||
params << ",safemode=" << args.next();
|
||||
|
||||
} else if (arg == "--jfrsync" || arg == "--jfropts") {
|
||||
params << "," << (arg.str() + 2) << "=" << args.next();
|
||||
output = "jfr";
|
||||
|
||||
@@ -213,7 +213,7 @@ ThreadState OS::threadState(int thread_id) {
|
||||
|
||||
ThreadState state = THREAD_UNKNOWN;
|
||||
if (read(fd, buf, sizeof(buf)) > 0) {
|
||||
char* s = strchr(buf, ')');
|
||||
char* s = strrchr(buf, ')');
|
||||
state = s != NULL && (s[2] == 'R' || s[2] == 'D') ? THREAD_RUNNING : THREAD_SLEEPING;
|
||||
}
|
||||
|
||||
|
||||
@@ -374,8 +374,9 @@ int OS::createMemoryFile(const char* name) {
|
||||
}
|
||||
|
||||
void OS::copyFile(int src_fd, int dst_fd, off_t offset, size_t size) {
|
||||
char* buf = (char*)mmap(NULL, size + offset, PROT_READ, MAP_PRIVATE, src_fd, 0);
|
||||
if (buf == NULL) {
|
||||
size_t map_size = size + offset;
|
||||
char* buf = (char*)mmap(NULL, map_size, PROT_READ, MAP_PRIVATE, src_fd, 0);
|
||||
if (buf == MAP_FAILED) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -388,7 +389,7 @@ void OS::copyFile(int src_fd, int dst_fd, off_t offset, size_t size) {
|
||||
size -= (size_t)bytes;
|
||||
}
|
||||
|
||||
munmap(buf, offset);
|
||||
munmap(buf, map_size);
|
||||
}
|
||||
|
||||
void OS::freePageCache(int fd, off_t start_offset) {
|
||||
|
||||
@@ -45,7 +45,7 @@ class PerfEvents : public CpuEngine {
|
||||
const char* title();
|
||||
const char* units();
|
||||
|
||||
static int walk(int tid, void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx);
|
||||
static int walk(int tid, void* ucontext, const void** callchain, int max_depth, u64* cpu);
|
||||
static void resetBuffer(int tid);
|
||||
|
||||
static bool supported();
|
||||
@@ -62,7 +62,7 @@ class PerfEvents : public CpuEngine {
|
||||
return Error("PerfEvents are not supported on this platform");
|
||||
}
|
||||
|
||||
static int walk(int tid, void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx) {
|
||||
static int walk(int tid, void* ucontext, const void** callchain, int max_depth, u64* cpu) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -58,6 +58,23 @@ enum {
|
||||
HW_BREAKPOINT_X = 4
|
||||
};
|
||||
|
||||
struct PerfCounter {
|
||||
u64 value;
|
||||
u64 time_enabled; /* PERF_FORMAT_TOTAL_TIME_ENABLED */
|
||||
u64 time_running; /* PERF_FORMAT_TOTAL_TIME_RUNNING */
|
||||
};
|
||||
|
||||
// Per-FD struct for storing perf-event multiplexing data
|
||||
struct MultiplexState {
|
||||
u64 time_enabled; /* stores previous time_enabled */
|
||||
u64 time_running; /* stores previous time_running */
|
||||
};
|
||||
|
||||
static const unsigned int MAX_MULTIPLEXED_FD = 65536;
|
||||
|
||||
static MultiplexState multiplex_state[MAX_MULTIPLEXED_FD];
|
||||
static bool multiplex_state_dirty = false;
|
||||
|
||||
static int fetchInt(const char* file_name) {
|
||||
int fd = open(file_name, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
@@ -587,6 +604,9 @@ int PerfEvents::createForThread(int tid) {
|
||||
attr.disabled = 1;
|
||||
attr.wakeup_events = 1;
|
||||
|
||||
// flags for multiplexing support
|
||||
attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
|
||||
|
||||
if (_alluser) {
|
||||
attr.exclude_kernel = 1;
|
||||
}
|
||||
@@ -599,16 +619,6 @@ int PerfEvents::createForThread(int tid) {
|
||||
attr.exclude_callchain_user = 1;
|
||||
}
|
||||
|
||||
#ifdef PERF_ATTR_SIZE_VER5
|
||||
if (_cstack == CSTACK_LBR) {
|
||||
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK | PERF_SAMPLE_REGS_USER;
|
||||
attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK;
|
||||
attr.sample_regs_user = 1ULL << PERF_REG_PC;
|
||||
}
|
||||
#else
|
||||
#warning "Compiling without LBR support. Kernel headers 4.1+ required"
|
||||
#endif
|
||||
|
||||
if (_record_cpu) {
|
||||
attr.sample_type |= PERF_SAMPLE_CPU;
|
||||
}
|
||||
@@ -648,6 +658,11 @@ int PerfEvents::createForThread(int tid) {
|
||||
_events[tid]._fd = fd;
|
||||
_events[tid]._page = (struct perf_event_mmap_page*)page;
|
||||
|
||||
if (multiplex_state_dirty && fd < MAX_MULTIPLEXED_FD) {
|
||||
multiplex_state[fd].time_enabled = 0;
|
||||
multiplex_state[fd].time_running = 0;
|
||||
}
|
||||
|
||||
struct f_owner_ex ex;
|
||||
ex.type = F_OWNER_TID;
|
||||
ex.pid = tid;
|
||||
@@ -700,8 +715,36 @@ u64 PerfEvents::readCounter(siginfo_t* siginfo, void* ucontext) {
|
||||
case 3: return StackFrame(ucontext).arg2();
|
||||
case 4: return StackFrame(ucontext).arg3();
|
||||
default: {
|
||||
u64 counter;
|
||||
return read(siginfo->si_fd, &counter, sizeof(counter)) == sizeof(counter) ? counter : 1;
|
||||
// Read counter with multiplexing metadata for accurate scaling
|
||||
struct PerfCounter counter;
|
||||
if (read(siginfo->si_fd, &counter, sizeof(counter)) == sizeof(counter)) {
|
||||
u64 current_val = counter.value;
|
||||
if (counter.time_enabled > counter.time_running) {
|
||||
int fd = siginfo->si_fd;
|
||||
if (fd < MAX_MULTIPLEXED_FD) {
|
||||
u64 delta_enabled = counter.time_enabled - multiplex_state[fd].time_enabled;
|
||||
u64 delta_running = counter.time_running - multiplex_state[fd].time_running;
|
||||
|
||||
multiplex_state[fd].time_enabled = counter.time_enabled;
|
||||
multiplex_state[fd].time_running = counter.time_running;
|
||||
|
||||
if (!multiplex_state_dirty) {
|
||||
multiplex_state_dirty = true;
|
||||
}
|
||||
|
||||
if (delta_running > 0 && delta_enabled > delta_running) {
|
||||
// scaled counter = (counter) * (delta_enabled / delta_running)
|
||||
double ratio = (double)delta_enabled / delta_running;
|
||||
return (u64)(current_val * ratio);
|
||||
}
|
||||
} else if (counter.time_running > 0) {
|
||||
double ratio = (double)counter.time_enabled / counter.time_running;
|
||||
return (u64)(current_val * ratio);
|
||||
}
|
||||
}
|
||||
return current_val;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -741,8 +784,8 @@ void PerfEvents::signalHandlerJ9(int signo, siginfo_t* siginfo, void* ucontext)
|
||||
if (_enabled) {
|
||||
u64 counter = readCounter(siginfo, ucontext);
|
||||
J9StackTraceNotification notif;
|
||||
StackContext java_ctx;
|
||||
notif.num_frames = _cstack == CSTACK_NO ? 0 : walk(OS::threadId(), ucontext, notif.addr, MAX_J9_NATIVE_FRAMES, &java_ctx);
|
||||
u64 cpu = 0;
|
||||
notif.num_frames = _cstack == CSTACK_NO ? 0 : walk(OS::threadId(), ucontext, notif.addr, MAX_J9_NATIVE_FRAMES, &cpu);
|
||||
J9StackTraces::checkpoint(counter, ¬if);
|
||||
} else {
|
||||
resetBuffer(OS::threadId());
|
||||
@@ -799,7 +842,7 @@ Error PerfEvents::start(Arguments& args) {
|
||||
// Automatically switch on alluser for non-CPU events, if kernel profiling is unavailable
|
||||
_alluser = strcmp(args._event, EVENT_CPU) != 0 && !supported();
|
||||
}
|
||||
_use_perf_mmap = _kernel_stack || _cstack == CSTACK_DEFAULT || _cstack == CSTACK_LBR || _record_cpu;
|
||||
_use_perf_mmap = _kernel_stack || _cstack == CSTACK_DEFAULT || _record_cpu;
|
||||
|
||||
if (strcmp(_event_type->name, "cpu-clock") == 0 && hasPerfEventRefreshBug()) {
|
||||
Log::debug("Enable workaround for PERF_EVENT_IOC_REFRESH bug");
|
||||
@@ -853,7 +896,7 @@ void PerfEvents::stop() {
|
||||
J9StackTraces::stop();
|
||||
}
|
||||
|
||||
int PerfEvents::walk(int tid, void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx) {
|
||||
int PerfEvents::walk(int tid, void* ucontext, const void** callchain, int max_depth, u64* cpu) {
|
||||
PerfEvent* event = &_events[tid];
|
||||
if (!event->tryLock()) {
|
||||
return 0; // the event is being destroyed
|
||||
@@ -874,7 +917,7 @@ int PerfEvents::walk(int tid, void* ucontext, const void** callchain, int max_de
|
||||
|
||||
if (hdr->type == PERF_RECORD_SAMPLE) {
|
||||
if (_record_cpu) {
|
||||
java_ctx->cpu = ring.next();
|
||||
*cpu = ring.next();
|
||||
}
|
||||
|
||||
u64 nr = ring.next();
|
||||
@@ -884,43 +927,12 @@ int PerfEvents::walk(int tid, void* ucontext, const void** callchain, int max_de
|
||||
const void* iptr = (const void*)ip;
|
||||
if (CodeHeap::contains(iptr) || depth >= max_depth) {
|
||||
// Stop at the first Java frame
|
||||
java_ctx->pc = iptr;
|
||||
goto stack_complete;
|
||||
}
|
||||
callchain[depth++] = iptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (_cstack == CSTACK_LBR) {
|
||||
u64 bnr = ring.next();
|
||||
|
||||
// Last userspace PC is stored right after branch stack
|
||||
const void* pc = (const void*)ring.peek(bnr * 3 + 2);
|
||||
if (CodeHeap::contains(pc) || depth >= max_depth) {
|
||||
java_ctx->pc = pc;
|
||||
goto stack_complete;
|
||||
}
|
||||
callchain[depth++] = pc;
|
||||
|
||||
while (bnr-- > 0) {
|
||||
const void* from = (const void*)ring.next();
|
||||
const void* to = (const void*)ring.next();
|
||||
ring.next();
|
||||
|
||||
if (CodeHeap::contains(to) || depth >= max_depth) {
|
||||
java_ctx->pc = to;
|
||||
goto stack_complete;
|
||||
}
|
||||
callchain[depth++] = to;
|
||||
|
||||
if (CodeHeap::contains(from) || depth >= max_depth) {
|
||||
java_ctx->pc = from;
|
||||
goto stack_complete;
|
||||
}
|
||||
callchain[depth++] = from;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
tail += hdr->size;
|
||||
@@ -933,9 +945,9 @@ stack_complete:
|
||||
event->unlock();
|
||||
|
||||
if (_cstack == CSTACK_FP) {
|
||||
depth += StackWalker::walkFP(ucontext, callchain + depth, max_depth - depth, java_ctx);
|
||||
depth += StackWalker::walkFP(ucontext, callchain + depth, max_depth - depth);
|
||||
} else if (_cstack == CSTACK_DWARF) {
|
||||
depth += StackWalker::walkDwarf(ucontext, callchain + depth, max_depth - depth, java_ctx);
|
||||
depth += StackWalker::walkDwarf(ucontext, callchain + depth, max_depth - depth);
|
||||
}
|
||||
|
||||
return depth;
|
||||
|
||||
325
src/profiler.cpp
325
src/profiler.cpp
@@ -97,10 +97,6 @@ static inline int hasNativeStack(EventType event_type) {
|
||||
return (1 << event_type) & events_with_native_stack;
|
||||
}
|
||||
|
||||
static inline bool isVTableStub(const char* name) {
|
||||
return name[0] && strcmp(name + 1, "table stub") == 0;
|
||||
}
|
||||
|
||||
static inline int makeFrame(ASGCT_CallFrame* frames, jint type, jmethodID id) {
|
||||
frames[0].bci = type;
|
||||
frames[0].method_id = id;
|
||||
@@ -121,15 +117,16 @@ void Profiler::addJavaMethod(const void* address, int length, jmethodID method)
|
||||
}
|
||||
|
||||
void Profiler::addRuntimeStub(const void* address, int length, const char* name) {
|
||||
if (startsWith(name, "Stub Generator ")) {
|
||||
name += 15; // useless prefix introduced with JDK-8336658
|
||||
} else if (streq(name, "Interpreter")) {
|
||||
CodeHeap::setInterpreterStart(address);
|
||||
}
|
||||
|
||||
_stubs_lock.lock();
|
||||
_runtime_stubs.add(address, length, name, true);
|
||||
_stubs_lock.unlock();
|
||||
|
||||
if (strcmp(name, "call_stub") == 0) {
|
||||
_call_stub_begin = address;
|
||||
_call_stub_end = (const char*)address + length;
|
||||
}
|
||||
|
||||
CodeHeap::updateBounds(address, (const char*)address + length);
|
||||
}
|
||||
|
||||
@@ -293,38 +290,19 @@ CodeBlob* Profiler::findRuntimeStub(const void* address) {
|
||||
return _runtime_stubs.findBlobByAddress(address);
|
||||
}
|
||||
|
||||
bool Profiler::isAddressInCode(const void* pc) {
|
||||
if (CodeHeap::contains(pc)) {
|
||||
return CodeHeap::findNMethod(pc) != NULL && !(pc >= _call_stub_begin && pc < _call_stub_end);
|
||||
} else {
|
||||
return findLibraryByAddress(pc) != NULL;
|
||||
}
|
||||
}
|
||||
|
||||
jmethodID Profiler::getCurrentCompileTask() {
|
||||
VMThread* vm_thread = VMThread::current();
|
||||
if (vm_thread != NULL) {
|
||||
VMMethod* method = vm_thread->compiledMethod();
|
||||
if (method != NULL) {
|
||||
return method->id();
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int Profiler::getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, EventType event_type, int tid, StackContext* java_ctx) {
|
||||
int Profiler::getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, EventType event_type, int tid, u64* cpu) {
|
||||
const void* callchain[MAX_NATIVE_FRAMES];
|
||||
int native_frames;
|
||||
|
||||
// Use PerfEvents stack walker for execution samples, or basic stack walker for other events
|
||||
if (event_type == PERF_SAMPLE) {
|
||||
native_frames = PerfEvents::walk(tid, ucontext, callchain, MAX_NATIVE_FRAMES, java_ctx);
|
||||
native_frames = PerfEvents::walk(tid, ucontext, callchain, MAX_NATIVE_FRAMES, cpu);
|
||||
} else if (_cstack == CSTACK_VM) {
|
||||
return 0;
|
||||
} else if (_cstack == CSTACK_DWARF) {
|
||||
native_frames = StackWalker::walkDwarf(ucontext, callchain, MAX_NATIVE_FRAMES, java_ctx);
|
||||
native_frames = StackWalker::walkDwarf(ucontext, callchain, MAX_NATIVE_FRAMES);
|
||||
} else {
|
||||
native_frames = StackWalker::walkFP(ucontext, callchain, MAX_NATIVE_FRAMES, java_ctx);
|
||||
native_frames = StackWalker::walkFP(ucontext, callchain, MAX_NATIVE_FRAMES);
|
||||
}
|
||||
|
||||
return convertNativeTrace(native_frames, callchain, frames, event_type);
|
||||
@@ -332,7 +310,6 @@ int Profiler::getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, EventType
|
||||
|
||||
int Profiler::convertNativeTrace(int native_frames, const void** callchain, ASGCT_CallFrame* frames, EventType event_type) {
|
||||
int depth = 0;
|
||||
jmethodID prev_method = NULL;
|
||||
|
||||
for (int i = 0; i < native_frames; i++) {
|
||||
const char* current_method_name = findNativeMethod(callchain[i]);
|
||||
@@ -349,32 +326,18 @@ int Profiler::convertNativeTrace(int native_frames, const void** callchain, ASGC
|
||||
// This is C++ interpreter frame, this and later frames should be reported
|
||||
// as Java frames returned by AGCT. Terminate the scan here.
|
||||
return depth;
|
||||
} else if (mark == MARK_COMPILER_ENTRY && _features.comp_task) {
|
||||
// Insert current compile task as a pseudo Java frame
|
||||
jmethodID compile_task = getCurrentCompileTask();
|
||||
if (compile_task != NULL) {
|
||||
frames[depth].bci = 0;
|
||||
frames[depth].method_id = compile_task;
|
||||
depth++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jmethodID current_method = (jmethodID)current_method_name;
|
||||
if (current_method == prev_method && _cstack == CSTACK_LBR) {
|
||||
// Skip duplicates in LBR stack, where branch_stack[N].from == branch_stack[N+1].to
|
||||
prev_method = NULL;
|
||||
} else {
|
||||
frames[depth].bci = BCI_NATIVE_FRAME;
|
||||
frames[depth].method_id = prev_method = current_method;
|
||||
depth++;
|
||||
}
|
||||
frames[depth].bci = BCI_NATIVE_FRAME;
|
||||
frames[depth].method_id = (jmethodID)current_method_name;
|
||||
depth++;
|
||||
}
|
||||
|
||||
return depth;
|
||||
}
|
||||
|
||||
int Profiler::getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max_depth, StackContext* java_ctx) {
|
||||
int Profiler::getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max_depth) {
|
||||
// Workaround for JDK-8132510: it's not safe to call GetEnv() inside a signal handler
|
||||
// since JDK 9, so we do it only for threads already registered in ThreadLocalStorage
|
||||
VMThread* vm_thread = VMThread::current();
|
||||
@@ -393,146 +356,14 @@ int Profiler::getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max
|
||||
return 0;
|
||||
}
|
||||
|
||||
StackFrame frame(ucontext);
|
||||
uintptr_t saved_pc, saved_sp, saved_fp;
|
||||
if (ucontext != NULL) {
|
||||
saved_pc = frame.pc();
|
||||
saved_sp = frame.sp();
|
||||
saved_fp = frame.fp();
|
||||
}
|
||||
|
||||
if (_features.unwind_native && vm_thread->inJava()) {
|
||||
if (saved_pc >= (uintptr_t)_call_stub_begin && saved_pc < (uintptr_t)_call_stub_end) {
|
||||
// call_stub is unsafe to walk
|
||||
frames->bci = BCI_ERROR;
|
||||
frames->method_id = (jmethodID)"call_stub";
|
||||
return 1;
|
||||
}
|
||||
if (DWARF_SUPPORTED && java_ctx->sp != 0) {
|
||||
// If a thread is in Java state, unwind manually to the last known Java frame,
|
||||
// since JVM does not always correctly unwind native frames
|
||||
frame.restore((uintptr_t)java_ctx->pc, java_ctx->sp, java_ctx->fp);
|
||||
}
|
||||
}
|
||||
|
||||
JitWriteProtection jit(false);
|
||||
ASGCT_CallTrace trace = {jni, 0, frames};
|
||||
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
||||
|
||||
if (trace.num_frames > 0) {
|
||||
frame.restore(saved_pc, saved_sp, saved_fp);
|
||||
return trace.num_frames;
|
||||
}
|
||||
|
||||
if ((trace.num_frames == ticks_unknown_Java || trace.num_frames == ticks_not_walkable_Java) && _features.unknown_java && ucontext != NULL) {
|
||||
CodeBlob* stub = NULL;
|
||||
_stubs_lock.lockShared();
|
||||
if (_runtime_stubs.contains((const void*)frame.pc())) {
|
||||
stub = findRuntimeStub((const void*)frame.pc());
|
||||
}
|
||||
_stubs_lock.unlockShared();
|
||||
|
||||
if (stub != NULL) {
|
||||
if (_cstack != CSTACK_NO) {
|
||||
if (_features.vtable_target && isVTableStub(stub->_name)) {
|
||||
uintptr_t receiver = frame.jarg0();
|
||||
if (receiver != 0) {
|
||||
VMSymbol* symbol = VMKlass::fromOop(receiver)->name();
|
||||
u32 class_id = classMap()->lookup(symbol->body(), symbol->length());
|
||||
max_depth -= makeFrame(trace.frames++, BCI_ALLOC, class_id);
|
||||
}
|
||||
}
|
||||
max_depth -= makeFrame(trace.frames++, BCI_NATIVE_FRAME, stub->_name);
|
||||
}
|
||||
if (_features.unwind_stub && frame.unwindStub((instruction_t*)stub->_start, stub->_name)
|
||||
&& isAddressInCode((const void*)frame.pc())) {
|
||||
java_ctx->pc = (const void*)frame.pc();
|
||||
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
||||
}
|
||||
} else if (VMStructs::hasMethodStructs()) {
|
||||
NMethod* nmethod = CodeHeap::findNMethod((const void*)frame.pc());
|
||||
if (nmethod != NULL && nmethod->isNMethod() && nmethod->isAlive()) {
|
||||
VMMethod* method = nmethod->method();
|
||||
if (method != NULL) {
|
||||
jmethodID method_id = method->id();
|
||||
if (method_id != NULL) {
|
||||
max_depth -= makeFrame(trace.frames++, 0, method_id);
|
||||
}
|
||||
if (_features.unwind_comp && frame.unwindCompiled(nmethod)
|
||||
&& isAddressInCode((const void*)frame.pc())) {
|
||||
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
||||
}
|
||||
if (_features.probe_sp && trace.num_frames < 0) {
|
||||
if (method_id != NULL) {
|
||||
trace.frames--;
|
||||
}
|
||||
for (int i = 0; trace.num_frames < 0 && i < PROBE_SP_LIMIT; i++) {
|
||||
frame.sp() += sizeof(void*);
|
||||
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (nmethod != NULL) {
|
||||
if (_cstack != CSTACK_NO) {
|
||||
max_depth -= makeFrame(trace.frames++, BCI_NATIVE_FRAME, nmethod->name());
|
||||
}
|
||||
if (_features.unwind_stub && frame.unwindStub(NULL, nmethod->name())
|
||||
&& isAddressInCode((const void*)frame.pc())) {
|
||||
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (trace.num_frames == ticks_unknown_not_Java && _features.java_anchor) {
|
||||
JavaFrameAnchor* anchor = vm_thread->anchor();
|
||||
uintptr_t sp = anchor->lastJavaSP();
|
||||
const void* pc = anchor->lastJavaPC();
|
||||
if (sp != 0 && pc == NULL) {
|
||||
// We have the last Java frame anchor, but it is not marked as walkable.
|
||||
// Make it walkable here
|
||||
pc = ((const void**)sp)[-1];
|
||||
anchor->setLastJavaPC(pc);
|
||||
|
||||
NMethod* m = CodeHeap::findNMethod(pc);
|
||||
if (m != NULL) {
|
||||
// AGCT fails if the last Java frame is a Runtime Stub with an invalid _frame_complete_offset.
|
||||
// In this case we patch _frame_complete_offset manually
|
||||
if (!m->isNMethod() && m->frameSize() > 0 && m->frameCompleteOffset() == -1) {
|
||||
m->setFrameCompleteOffset(0);
|
||||
}
|
||||
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
||||
} else if (findLibraryByAddress(pc) != NULL) {
|
||||
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
||||
}
|
||||
|
||||
anchor->setLastJavaPC(NULL);
|
||||
}
|
||||
} else if (trace.num_frames == ticks_not_walkable_not_Java && _features.java_anchor) {
|
||||
JavaFrameAnchor* anchor = vm_thread->anchor();
|
||||
uintptr_t sp = anchor->lastJavaSP();
|
||||
const void* pc = anchor->lastJavaPC();
|
||||
if (sp != 0 && pc != NULL) {
|
||||
// Similar to the above: last Java frame is set,
|
||||
// but points to a Runtime Stub with an invalid _frame_complete_offset
|
||||
NMethod* m = CodeHeap::findNMethod(pc);
|
||||
if (m != NULL && !m->isNMethod() && m->frameSize() > 0 && m->frameCompleteOffset() == -1) {
|
||||
m->setFrameCompleteOffset(0);
|
||||
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
||||
}
|
||||
}
|
||||
} else if (trace.num_frames == ticks_GC_active && _features.gc_traces) {
|
||||
if (vm_thread->anchor()->lastJavaSP() == 0) {
|
||||
// Do not add 'GC_active' for threads with no Java frames, e.g. Compiler threads
|
||||
frame.restore(saved_pc, saved_sp, saved_fp);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
frame.restore(saved_pc, saved_sp, saved_fp);
|
||||
|
||||
if (trace.num_frames > 0) {
|
||||
return trace.num_frames + (trace.frames - frames);
|
||||
}
|
||||
|
||||
const char* err_string = asgctError(trace.num_frames);
|
||||
if (err_string == NULL) {
|
||||
// No Java stack, because thread is not in Java context
|
||||
@@ -540,9 +371,7 @@ int Profiler::getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max
|
||||
}
|
||||
|
||||
atomicInc(_failures[-trace.num_frames]);
|
||||
trace.frames->bci = BCI_ERROR;
|
||||
trace.frames->method_id = (jmethodID)err_string;
|
||||
return trace.frames - frames + 1;
|
||||
return makeFrame(frames, BCI_ERROR, err_string);
|
||||
}
|
||||
|
||||
int Profiler::getJavaTraceJvmti(jvmtiFrameInfo* jvmti_frames, ASGCT_CallFrame* frames, int start_depth, int max_depth) {
|
||||
@@ -560,49 +389,6 @@ int Profiler::getJavaTraceJvmti(jvmtiFrameInfo* jvmti_frames, ASGCT_CallFrame* f
|
||||
return num_frames;
|
||||
}
|
||||
|
||||
void Profiler::fillFrameTypes(ASGCT_CallFrame* frames, int num_frames, NMethod* nmethod) {
|
||||
if (nmethod->isNMethod() && nmethod->isAlive()) {
|
||||
VMMethod* method = nmethod->method();
|
||||
if (method == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
jmethodID current_method_id = method->id();
|
||||
if (current_method_id == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the top frame is a runtime stub, skip it
|
||||
if (num_frames > 0 && frames[0].bci == BCI_NATIVE_FRAME) {
|
||||
frames++;
|
||||
num_frames--;
|
||||
}
|
||||
|
||||
// Mark current_method as COMPILED and frames above current_method as INLINED
|
||||
for (int i = 0; i < num_frames; i++) {
|
||||
if (frames[i].method_id == NULL || frames[i].bci <= BCI_NATIVE_FRAME) {
|
||||
break;
|
||||
}
|
||||
if (frames[i].method_id == current_method_id) {
|
||||
int level = nmethod->level();
|
||||
frames[i].bci = FrameType::encode(level >= 1 && level <= 3 ? FRAME_C1_COMPILED : FRAME_JIT_COMPILED, frames[i].bci);
|
||||
for (int j = 0; j < i; j++) {
|
||||
frames[j].bci = FrameType::encode(FRAME_INLINED, frames[j].bci);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (nmethod->isInterpreter()) {
|
||||
// Mark the first Java frame as INTERPRETED
|
||||
for (int i = 0; i < num_frames; i++) {
|
||||
if (frames[i].bci > BCI_NATIVE_FRAME) {
|
||||
frames[i].bci = FrameType::encode(FRAME_INTERPRETED, frames[i].bci);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u64 Profiler::recordSample(void* ucontext, u64 counter, EventType event_type, Event* event) {
|
||||
atomicInc(_total_samples);
|
||||
|
||||
@@ -637,13 +423,13 @@ u64 Profiler::recordSample(void* ucontext, u64 counter, EventType event_type, Ev
|
||||
}
|
||||
}
|
||||
|
||||
StackContext java_ctx = {0};
|
||||
u64 cpu = 0;
|
||||
if (hasNativeStack(event_type)) {
|
||||
if (_features.pc_addr && event_type <= WALL_CLOCK_SAMPLE) {
|
||||
num_frames += makeFrame(frames + num_frames, BCI_ADDRESS, StackFrame(ucontext).pc());
|
||||
}
|
||||
if (_cstack != CSTACK_NO) {
|
||||
num_frames += getNativeTrace(ucontext, frames + num_frames, event_type, tid, &java_ctx);
|
||||
num_frames += getNativeTrace(ucontext, frames + num_frames, event_type, tid, &cpu);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -653,21 +439,14 @@ u64 Profiler::recordSample(void* ucontext, u64 counter, EventType event_type, Ev
|
||||
if (_cstack == CSTACK_VM) {
|
||||
num_frames += StackWalker::walkVM(ucontext, frames + num_frames, _max_stack_depth, lock_index, _features, event_type);
|
||||
} else {
|
||||
int java_frames = getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth, &java_ctx);
|
||||
if (java_frames > 0 && java_ctx.pc != NULL && VMStructs::hasMethodStructs()) {
|
||||
NMethod* nmethod = CodeHeap::findNMethod(java_ctx.pc);
|
||||
if (nmethod != NULL) {
|
||||
fillFrameTypes(frames + num_frames, java_frames, nmethod);
|
||||
}
|
||||
}
|
||||
num_frames += java_frames;
|
||||
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth);
|
||||
}
|
||||
} else if (event_type >= ALLOC_SAMPLE && event_type <= ALLOC_OUTSIDE_TLAB && _alloc_engine == &alloc_tracer) {
|
||||
if (VMStructs::hasStackStructs()) {
|
||||
StackWalkFeatures no_features{};
|
||||
num_frames += StackWalker::walkVM(ucontext, frames + num_frames, _max_stack_depth, lock_index, no_features, event_type);
|
||||
} else {
|
||||
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth, &java_ctx);
|
||||
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth);
|
||||
}
|
||||
} else {
|
||||
// Lock events and instrumentation events can safely call synchronous JVM TI stack walker.
|
||||
@@ -678,6 +457,9 @@ u64 Profiler::recordSample(void* ucontext, u64 counter, EventType event_type, Ev
|
||||
|
||||
if (num_frames == 0) {
|
||||
num_frames += makeFrame(frames + num_frames, BCI_ERROR, "no_Java_frame");
|
||||
} else if (num_frames >= _max_stack_depth && _truncated_stack_depth < _max_stack_depth) {
|
||||
num_frames = _truncated_stack_depth;
|
||||
num_frames += makeFrame(frames + num_frames, BCI_ERROR, "truncated");
|
||||
}
|
||||
|
||||
if (_add_thread_frame) {
|
||||
@@ -687,7 +469,7 @@ u64 Profiler::recordSample(void* ucontext, u64 counter, EventType event_type, Ev
|
||||
num_frames += makeFrame(frames + num_frames, BCI_ERROR, OS::schedPolicy(0));
|
||||
}
|
||||
if (_add_cpu_frame && event_type == PERF_SAMPLE) {
|
||||
num_frames += makeFrame(frames + num_frames, BCI_CPU, java_ctx.cpu | 0x8000);
|
||||
num_frames += makeFrame(frames + num_frames, BCI_CPU, cpu | 0x8000);
|
||||
}
|
||||
|
||||
if (stack_walk_begin != 0) {
|
||||
@@ -794,7 +576,7 @@ void* Profiler::dlopen_hook(const char* filename, int flags) {
|
||||
void Profiler::switchLibraryTrap(bool enable) {
|
||||
if (_dlopen_entry != NULL) {
|
||||
void* impl = enable ? (void*)dlopen_hook : (void*)dlopen;
|
||||
__atomic_store_n(_dlopen_entry, impl, __ATOMIC_RELEASE);
|
||||
storeRelease(*_dlopen_entry, impl);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -869,11 +651,6 @@ void Profiler::crashHandler(int signo, siginfo_t* siginfo, void* ucontext) {
|
||||
StackWalker::checkFault();
|
||||
}
|
||||
|
||||
// Workaround for JDK-8313796. Setting cstack=dwarf also helps
|
||||
if (VMStructs::isInterpretedFrameValidFunc((const void*)pc) && frame.skipFaultInstruction()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (WX_MEMORY && Trap::isFaultInstruction(pc)) {
|
||||
return;
|
||||
}
|
||||
@@ -966,11 +743,13 @@ void Profiler::updateNativeThreadNames() {
|
||||
}
|
||||
}
|
||||
|
||||
Engine* Profiler::selectEngine(const char* event_name) {
|
||||
Engine* Profiler::selectEngine(Arguments& args) {
|
||||
const char* event_name = args._event;
|
||||
|
||||
if (event_name == NULL) {
|
||||
return &noop_engine;
|
||||
} else if (strcmp(event_name, EVENT_CPU) == 0) {
|
||||
if (FdTransferClient::hasPeer() || PerfEvents::supported()) {
|
||||
if (args._record_cpu || args._target_cpu != -1 || FdTransferClient::hasPeer() || PerfEvents::supported()) {
|
||||
return &perf_events;
|
||||
} else if (CTimer::supported()) {
|
||||
return &ctimer;
|
||||
@@ -994,8 +773,8 @@ Engine* Profiler::selectEngine(const char* event_name) {
|
||||
}
|
||||
}
|
||||
|
||||
Engine* Profiler::selectAllocEngine(long alloc_interval, bool live) {
|
||||
if (VM::addSampleObjectsCapability()) {
|
||||
Engine* Profiler::selectAllocEngine(bool tlab) {
|
||||
if (!tlab && VM::addSampleObjectsCapability()) {
|
||||
return &object_sampler;
|
||||
} else if (VM::isOpenJ9()) {
|
||||
return &j9_object_sampler;
|
||||
@@ -1105,7 +884,7 @@ Error Profiler::start(Arguments& args, bool reset) {
|
||||
lockAll();
|
||||
_class_map.clear();
|
||||
_thread_filter.clear();
|
||||
_call_trace_storage.clear();
|
||||
_call_trace_storage.clear(args._mem_limit);
|
||||
// Make sure frame structure is consistent throughout the entire recording
|
||||
_add_event_frame = args._output != OUTPUT_JFR;
|
||||
_add_thread_frame = args._threads && args._output != OUTPUT_JFR;
|
||||
@@ -1133,12 +912,9 @@ Error Profiler::start(Arguments& args, bool reset) {
|
||||
}
|
||||
}
|
||||
}
|
||||
_truncated_stack_depth = std::min(std::max(args._truncated_stack_depth, 0), _max_stack_depth);
|
||||
|
||||
_features = args._features;
|
||||
if (VM::hotspot_version() < 8) {
|
||||
_features.java_anchor = 0;
|
||||
_features.gc_traces = 0;
|
||||
}
|
||||
if (!VMStructs::hasClassNames()) {
|
||||
_features.vtable_target = 0;
|
||||
}
|
||||
@@ -1149,7 +925,7 @@ Error Profiler::start(Arguments& args, bool reset) {
|
||||
_update_thread_names = args._threads || args._output == OUTPUT_JFR;
|
||||
_thread_filter.init(args._filter);
|
||||
|
||||
_engine = selectEngine(args._event);
|
||||
_engine = selectEngine(args);
|
||||
if (_engine == &wall_clock && args._wall >= 0) {
|
||||
return Error("Cannot start wall clock with the selected event");
|
||||
} else if (_engine != &perf_events && args._target_cpu != -1) {
|
||||
@@ -1163,23 +939,21 @@ Error Profiler::start(Arguments& args, bool reset) {
|
||||
_cstack = args._cstack;
|
||||
if (_cstack == CSTACK_DWARF && !DWARF_SUPPORTED) {
|
||||
return Error("DWARF unwinding is not supported on this platform");
|
||||
} else if (_cstack == CSTACK_LBR && _engine != &perf_events) {
|
||||
return Error("Branch stack is supported only with PMU events");
|
||||
} else if (_cstack == CSTACK_VM && VM::loaded() && !VMStructs::hasStackStructs()) {
|
||||
return Error("VMStructs stack walking is not supported on this JVM/platform");
|
||||
}
|
||||
|
||||
if (_cstack == CSTACK_DEFAULT) {
|
||||
if (VMStructs::hasStackStructs()) {
|
||||
// Use VMStructs by default when possible
|
||||
_cstack = args._cstack = CSTACK_VM;
|
||||
} else if (VM::isOpenJ9() && DWARF_SUPPORTED) {
|
||||
// OpenJ9 libs are compiled with frame pointers omitted
|
||||
_cstack = args._cstack = CSTACK_DWARF;
|
||||
}
|
||||
if ((_cstack == CSTACK_DEFAULT || _cstack == CSTACK_DWARF) && VMStructs::hasStackStructs() && !_features.agct) {
|
||||
// Use VMStructs by default when possible
|
||||
_cstack = args._cstack = CSTACK_VM;
|
||||
} else if (_cstack == CSTACK_DEFAULT && VM::isOpenJ9() && DWARF_SUPPORTED) {
|
||||
// OpenJ9 libs are compiled with frame pointers omitted
|
||||
_cstack = args._cstack = CSTACK_DWARF;
|
||||
}
|
||||
|
||||
if (_cstack != CSTACK_VM && _features.mixed) {
|
||||
if (_cstack == CSTACK_VM && _features.agct) {
|
||||
return Error("agct feature is incompatible with cstack=vm");
|
||||
} else if (_cstack != CSTACK_VM && _features.mixed) {
|
||||
return Error("mixed feature is only allowed with VMStructs stack walking");
|
||||
}
|
||||
|
||||
@@ -1207,7 +981,7 @@ Error Profiler::start(Arguments& args, bool reset) {
|
||||
}
|
||||
|
||||
if (_event_mask & EM_ALLOC) {
|
||||
_alloc_engine = selectAllocEngine(args._alloc, args._live);
|
||||
_alloc_engine = selectAllocEngine(args._tlab);
|
||||
error = _alloc_engine->start(args);
|
||||
if (error) {
|
||||
goto error2;
|
||||
@@ -1331,11 +1105,6 @@ Error Profiler::stop(bool restart) {
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
Error Profiler::check(Arguments& args) {
|
||||
Log::warn("The 'check' command is deprecated and will be removed in the next release");
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
Error Profiler::flushJfr() {
|
||||
MutexLocker ml(_state_lock);
|
||||
if (_state != RUNNING) {
|
||||
@@ -1791,14 +1560,6 @@ Error Profiler::runInternal(Arguments& args, Writer& out) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTION_CHECK: {
|
||||
Error error = check(args);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
out << "OK\n";
|
||||
break;
|
||||
}
|
||||
case ACTION_STATUS: {
|
||||
MutexLocker ml(_state_lock);
|
||||
if (_state == RUNNING) {
|
||||
|
||||
@@ -80,6 +80,7 @@ class Profiler {
|
||||
SpinLock _locks[CONCURRENCY_LEVEL];
|
||||
CallTraceBuffer* _calltrace_buffer[CONCURRENCY_LEVEL];
|
||||
int _max_stack_depth;
|
||||
int _truncated_stack_depth;
|
||||
StackWalkFeatures _features;
|
||||
CStack _cstack;
|
||||
bool _add_event_frame;
|
||||
@@ -92,8 +93,6 @@ class Profiler {
|
||||
SpinLock _stubs_lock;
|
||||
CodeCache _runtime_stubs;
|
||||
CodeCacheArray _native_libs;
|
||||
const void* _call_stub_begin;
|
||||
const void* _call_stub_end;
|
||||
|
||||
// dlopen() hook support
|
||||
void** _dlopen_entry;
|
||||
@@ -112,18 +111,16 @@ class Profiler {
|
||||
|
||||
const char* asgctError(int code);
|
||||
u32 getLockIndex(int tid);
|
||||
jmethodID getCurrentCompileTask();
|
||||
int getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, EventType event_type, int tid, StackContext* java_ctx);
|
||||
int getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max_depth, StackContext* java_ctx);
|
||||
int getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, EventType event_type, int tid, u64* cpu);
|
||||
int getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max_depth);
|
||||
int getJavaTraceJvmti(jvmtiFrameInfo* jvmti_frames, ASGCT_CallFrame* frames, int start_depth, int max_depth);
|
||||
void fillFrameTypes(ASGCT_CallFrame* frames, int num_frames, NMethod* nmethod);
|
||||
void setThreadInfo(int tid, const char* name, jlong java_thread_id);
|
||||
void updateThreadName(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread);
|
||||
void updateJavaThreadNames();
|
||||
void updateNativeThreadNames();
|
||||
void mangle(const char* name, char* buf, size_t size);
|
||||
Engine* selectEngine(const char* event_name);
|
||||
Engine* selectAllocEngine(long alloc_interval, bool live);
|
||||
Engine* selectEngine(Arguments& args);
|
||||
Engine* selectAllocEngine(bool tlab);
|
||||
Engine* activeEngine();
|
||||
Error checkJvmCapabilities();
|
||||
|
||||
@@ -166,12 +163,11 @@ class Profiler {
|
||||
_gc_id(0),
|
||||
_timer_id(NULL),
|
||||
_max_stack_depth(0),
|
||||
_truncated_stack_depth(0),
|
||||
_thread_events_state(JVMTI_DISABLE),
|
||||
_stubs_lock(),
|
||||
_runtime_stubs("[stubs]"),
|
||||
_native_libs(),
|
||||
_call_stub_begin(NULL),
|
||||
_call_stub_end(NULL),
|
||||
_dlopen_entry(NULL) {
|
||||
|
||||
for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
|
||||
@@ -194,7 +190,6 @@ class Profiler {
|
||||
Error runInternal(Arguments& args, Writer& out);
|
||||
Error expire(Arguments& args, bool restart);
|
||||
void shutdown(Arguments& args);
|
||||
Error check(Arguments& args);
|
||||
Error start(Arguments& args, bool reset);
|
||||
Error stop(bool restart = false);
|
||||
Error flushJfr();
|
||||
@@ -219,7 +214,6 @@ class Profiler {
|
||||
CodeCache* findLibraryByAddress(const void* address);
|
||||
const char* findNativeMethod(const void* address);
|
||||
CodeBlob* findRuntimeStub(const void* address);
|
||||
bool isAddressInCode(const void* pc);
|
||||
|
||||
void trapHandler(int signo, siginfo_t* siginfo, void* ucontext);
|
||||
static void crashHandler(int signo, siginfo_t* siginfo, void* ucontext);
|
||||
|
||||
@@ -3,34 +3,47 @@
|
||||
<head>
|
||||
<meta charset='utf-8'>
|
||||
<style>
|
||||
body {margin: 0; padding: 10px 10px 22px 10px; background-color: #ffffff}
|
||||
:root {--bg: #ffffff; --fg: #000000; --hl-bg: #ffffe0; --hl-border: #ffc000; --link: #0366d6; --legend-bg: #ffffe0; --legend-border: #666666}
|
||||
:root.dark {--bg: #1e1e1e; --fg: #cccccc; --hl-bg: #3a3a00; --hl-border: #8a7000; --link: #58a6ff; --legend-bg: #333333; --legend-border: #888888}
|
||||
body {margin: 0; padding: 10px 10px 22px 10px; background-color: var(--bg); color: var(--fg)}
|
||||
h1 {margin: 5px 0 0 0; font-size: 18px; font-weight: normal; text-align: center}
|
||||
header {margin: -22px 0 6px 0}
|
||||
button {border: none; background: none; width: 24px; height: 24px; cursor: pointer; margin: 0; padding: 2px 0 0 0; text-align: center}
|
||||
button:hover {background-color: #ffffe0; outline: 1px solid #ffc000; border-radius: 4px}
|
||||
button:hover {background-color: var(--hl-bg); outline: 1px solid var(--hl-border); border-radius: 4px}
|
||||
dl {margin: 0 4px 8px 4px}
|
||||
dt {margin: 1px; padding: 2px 0; font-weight: bold}
|
||||
dd {margin: 1px; padding: 2px 4px}
|
||||
dl.frames {float: left; width: 160px}
|
||||
dl.hotkeys {clear: left; border-top: 1px solid #666666}
|
||||
dl.frames > dd {color: #000000}
|
||||
dl.hotkeys {clear: left; border-top: 1px solid var(--legend-border)}
|
||||
dl.hotkeys > dt {float: left; clear: left; width: 158px; margin-right: 4px; text-align: right}
|
||||
dl.hotkeys > dd {float: left}
|
||||
p {position: fixed; bottom: 0; margin: 0; padding: 2px 3px 2px 3px; outline: 1px solid #ffc000; display: none; overflow: hidden; white-space: nowrap; background-color: #ffffe0}
|
||||
a {color: #0366d6}
|
||||
#legend {padding: 4px; border-radius: 4px; background: #ffffe0; border: 1px solid #666666; display: none}
|
||||
#hl {position: absolute; display: none; overflow: hidden; white-space: nowrap; pointer-events: none; background-color: #ffffe0; outline: 1px solid #ffc000; height: 15px}
|
||||
p {position: fixed; bottom: 0; margin: 0; padding: 2px 3px 2px 3px; outline: 1px solid var(--hl-border); display: none; overflow: hidden; white-space: nowrap; background-color: var(--hl-bg); color: var(--fg)}
|
||||
a {color: var(--link)}
|
||||
#legend {padding: 4px; border-radius: 4px; background: var(--legend-bg); border: 1px solid var(--legend-border); display: none}
|
||||
#hl {position: absolute; display: none; overflow: hidden; white-space: nowrap; pointer-events: none; background-color: var(--hl-bg); outline: 1px solid var(--hl-border); height: 15px}
|
||||
#hl span {padding: 0 3px 0 3px}
|
||||
#status {left: 0}
|
||||
#match {right: 0}
|
||||
#reset {cursor: pointer}
|
||||
#canvas {width: 100%; height: /*height:*/300px}
|
||||
</style>
|
||||
<script>
|
||||
{
|
||||
let theme;
|
||||
try { theme = localStorage.getItem('flame-theme'); } catch (ignored) {}
|
||||
if (theme ? theme === 'dark' : matchMedia('(prefers-color-scheme: dark)').matches) {
|
||||
document.documentElement.classList.add('dark');
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body style='font: 12px Verdana, sans-serif'>
|
||||
<h1>/*title:*/</h1>
|
||||
<header style='float: left'>
|
||||
<button id='inverted' title='Invert (I)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 392 392'><path d='M196,36 L316,156 L76,156 Z' fill='#004d80'/><path d='M196,356 L76,236 L316,236 Z' fill='#004d80'/><path d='M196,54 L298,156 L94,156 Z' fill='#ff8d40'/><path d='M196,338 L94,236 L298,236 Z' fill='#40b2ff'/><rect x='94' y='188' width='204' height='16' fill='#004d80'/></svg></button>
|
||||
<button id='search' title='Search (Ctrl+F)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='-39.3 -39.3 471.1 471.1'><circle cx='147.7' cy='147.8' r='125.9' fill='#fff'/><path fill='#40b2ff' d='M370.7 348.7c0 1.4-1.6 6.3-7.2 12.3-6.2 6.7-12.5 9.8-14.7 9.8h-.1c-19.5-1.6-62-43.2-109.6-106.8 9.2-7.2 17.5-15.5 24.6-24.6 63.6 47.6 105.2 90.2 106.8 109.6z'/><path fill='#ff8d40' d='M208.7 86.9l-14.5 14.5c-17.1 17.1-46.5 5-46.5-19.3V61.6c-49 0-88.4 40.8-86.1 90.2 2 43.9 38.1 80 82 82 49.5 2.3 90.2-37.2 90.2-86.1 0-23.7-9.6-45.2-25.1-60.8z'/><path fill='#004d80' d='M276.1 221c12.3-21.5 19.5-46.5 19.5-73.2C295.6 66.3 229.2.1 147.7.1S0 66.3 0 147.9s66.3 147.7 147.7 147.7c26.6 0 51.5-7.1 73.2-19.5 39.8 53.3 91.9 113.5 126.1 116.4 12.3.5 22.9-6.7 32.8-16.7 5.2-5.6 13.8-16.9 12.8-28.8-2.9-34.1-63.1-86.2-116.4-126.1zM147.7 273.8c-69.5 0-125.9-56.5-125.9-125.9S78.3 21.9 147.7 21.9 273.6 78.4 273.6 147.8s-56.4 126-125.9 126zm215.9 87.2c-6.2 6.7-12.4 9.8-14.7 9.8h-.1c-19.5-1.6-62-43.2-109.6-106.8 9.2-7.2 17.5-15.5 24.6-24.6 63.6 47.6 105.2 90.2 106.8 109.6 0 1.4-1.6 6.3-7.2 12.4z'/></svg></button>
|
||||
<button id='darkmode' title='Toggle dark mode (D)'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 20 20'><path d='M10 4a6 6 0 0 1 0 12z' fill='#ff8d40'/><path d='M10 4a6 6 0 0 0 0 12z' fill='#ffffff'/><circle cx='10' cy='10' r='8' fill='none' stroke='#004d80'/></svg></button>
|
||||
<button id='info'><svg xmlns='http://www.w3.org/2000/svg' width='20' height='20' viewBox='0 0 20 20'><circle cx='10' cy='10' r='8' stroke='#004d80' fill='none'/><path d='M10 5.5c-1.25 0-2.25 1-2.25 2.25H9a1.25 1.25 0 0 1 2.5 0c0 .65-.55 1-1 1.2-.7.35-1.25.85-1.25 1.8V11h1.5v-.25c0-.37.29-.65.68-.83.73-.34 1.32-.87 1.32-2.17 0-1.25-1.5-2.25-2.75-2.25' fill='#ff8d40' stroke='#ff8d40' stroke-width='.6' stroke-linecap='round' stroke-linejoin='round'/><circle cx='10' cy='13.5' r='1.2' fill='#ff8d40'/></svg></button>
|
||||
</header>
|
||||
<header style='float: right'>Produced by <a href='https://github.com/async-profiler/async-profiler'>async-profiler</a></header>
|
||||
@@ -57,7 +70,7 @@
|
||||
</dl>
|
||||
<dl class='hotkeys'>
|
||||
<dt>Click frame</dt><dd>Zoom into frame</dd>
|
||||
<dt>Alt+Click</dt><dd>Remove stack</dd>
|
||||
<dt>Ctrl/Alt+Click</dt><dd>Remove stack</dd>
|
||||
<dt>0</dt><dd>Reset zoom</dd>
|
||||
<dt>I</dt><dd>Invert graph</dd>
|
||||
<dt>Ctrl+F</dt><dd>Search</dd>
|
||||
@@ -75,9 +88,11 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
'use strict';
|
||||
let root, px, pattern;
|
||||
let level0 = 0, left0 = 0, width0 = 0;
|
||||
let level0 = 0, left0 = 0, width0 = 0, d = 0;
|
||||
let nav = [], navIndex, matchval;
|
||||
let inverted = /*inverted:*/false;
|
||||
const U = undefined;
|
||||
const maxdiff = /*maxdiff:*/-1;
|
||||
const levels = Array(/*depth:*/0);
|
||||
for (let h = 0; h < levels.length; h++) {
|
||||
levels[h] = [];
|
||||
@@ -111,10 +126,18 @@
|
||||
return '#' + (p[0] + ((p[1] * v) << 16 | (p[2] * v) << 8 | (p[3] * v))).toString(16);
|
||||
}
|
||||
|
||||
function getDiffColor(diff) {
|
||||
if (diff === U) return '#ffdd33';
|
||||
if (diff === 0) return '#e0e0e0';
|
||||
const v = Math.round(128 * (maxdiff - Math.abs(diff)) / maxdiff) + 96;
|
||||
return diff > 0 ? 'rgb(255,' + v + ',' + v + ')' : 'rgb(' + v + ',' + v + ',255)';
|
||||
}
|
||||
|
||||
function f(key, level, left, width, inln, c1, int) {
|
||||
levels[level0 = level].push({level, left: left0 += left, width: width0 = width || width0,
|
||||
color: getColor(palette[key & 7]), title: cpool[key >>> 3],
|
||||
details: (int ? ', int=' + int : '') + (c1 ? ', c1=' + c1 : '') + (inln ? ', inln=' + inln : '')
|
||||
color: maxdiff >= 0 ? getDiffColor(d) : getColor(palette[key & 7]),
|
||||
title: cpool[key >>> 3],
|
||||
details: (d ? (d > 0 ? ', +' : ', ') + d : '') + (int ? ', int=' + int : '') + (c1 ? ', c1=' + c1 : '') + (inln ? ', inln=' + inln : '')
|
||||
});
|
||||
}
|
||||
|
||||
@@ -186,8 +209,10 @@
|
||||
}
|
||||
|
||||
function render(newRoot, nav) {
|
||||
const bg = getComputedStyle(document.documentElement).getPropertyValue('--bg');
|
||||
|
||||
if (root) {
|
||||
c.fillStyle = '#ffffff';
|
||||
c.fillStyle = bg;
|
||||
c.fillRect(0, 0, canvasWidth, canvasHeight);
|
||||
}
|
||||
|
||||
@@ -229,7 +254,7 @@
|
||||
}
|
||||
|
||||
if (f.level < root.level) {
|
||||
c.fillStyle = 'rgba(255, 255, 255, 0.5)';
|
||||
c.fillStyle = bg + '80';
|
||||
c.fillRect((f.left - x0) * px, y, f.width * px, 15);
|
||||
}
|
||||
}
|
||||
@@ -266,7 +291,7 @@
|
||||
canvas.title = f.title + '\n(' + samples(f.width) + f.details + ', ' + pct(f.width, levels[0][0].width) + '%)';
|
||||
canvas.style.cursor = 'pointer';
|
||||
canvas.onclick = function() {
|
||||
if (event.altKey && h >= root.level && h > 0) {
|
||||
if ((event.altKey || event.ctrlKey) && h >= root.level && h > 0) {
|
||||
removeStack(f.left, f.width);
|
||||
root.width > f.width ? render(root) : render();
|
||||
} else if (f !== root) {
|
||||
@@ -307,6 +332,12 @@
|
||||
search(false);
|
||||
}
|
||||
|
||||
document.getElementById('darkmode').onclick = function() {
|
||||
const theme = document.documentElement.classList.toggle('dark') ? 'dark' : 'light';
|
||||
try { localStorage.setItem('flame-theme', theme); } catch (ignored) {}
|
||||
render(root);
|
||||
}
|
||||
|
||||
const btnInfo = document.getElementById('info');
|
||||
const legend = document.getElementById('legend');
|
||||
|
||||
@@ -338,6 +369,9 @@
|
||||
canvas.onmouseout();
|
||||
document.getElementById('inverted').onclick();
|
||||
return false;
|
||||
} else if (event.key === 'd') {
|
||||
document.getElementById('darkmode').onclick();
|
||||
return false;
|
||||
} else if (event.key === '0') {
|
||||
canvas.onmouseout();
|
||||
root = levels[0][0];
|
||||
|
||||
@@ -28,14 +28,6 @@ class StackFrame {
|
||||
_ucontext = (ucontext_t*)ucontext;
|
||||
}
|
||||
|
||||
void restore(uintptr_t saved_pc, uintptr_t saved_sp, uintptr_t saved_fp) {
|
||||
if (_ucontext != nullptr) {
|
||||
pc() = saved_pc;
|
||||
sp() = saved_sp;
|
||||
fp() = saved_fp;
|
||||
}
|
||||
}
|
||||
|
||||
uintptr_t stackAt(int slot) {
|
||||
return ((uintptr_t*)sp())[slot];
|
||||
}
|
||||
@@ -56,27 +48,14 @@ class StackFrame {
|
||||
|
||||
void ret();
|
||||
|
||||
bool unwindStub(instruction_t* entry, const char* name) {
|
||||
return unwindStub(entry, name, pc(), sp(), fp());
|
||||
}
|
||||
|
||||
bool unwindCompiled(NMethod* nm) {
|
||||
return unwindCompiled(nm, pc(), sp(), fp());
|
||||
}
|
||||
|
||||
bool unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp);
|
||||
bool unwindAtomicStub(const void*& pc);
|
||||
|
||||
// TODO: this function will be removed once `vm` becomes the default stack walking mode
|
||||
bool unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp);
|
||||
|
||||
bool unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp);
|
||||
bool unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp);
|
||||
|
||||
void adjustSP(const void* entry, const void* pc, uintptr_t& sp);
|
||||
|
||||
bool skipFaultInstruction();
|
||||
|
||||
bool checkInterruptedSyscall();
|
||||
|
||||
// Check if PC points to a syscall instruction
|
||||
|
||||
@@ -83,13 +83,13 @@ static inline bool isFixedSizeFrame(const char* name) {
|
||||
// Dispatch by the first character to optimize lookup
|
||||
switch (name[0]) {
|
||||
case 'i':
|
||||
return strncmp(name, "indexof_linear_", 15) == 0;
|
||||
return startsWith(name, "indexof_linear_");
|
||||
case 'm':
|
||||
return strncmp(name, "md5_implCompress", 16) == 0;
|
||||
return startsWith(name, "md5_implCompress");
|
||||
case 's':
|
||||
return strncmp(name, "sha256_implCompress", 19) == 0
|
||||
|| strncmp(name, "string_indexof_linear_", 22) == 0
|
||||
|| strncmp(name, "slow_subtype_check", 18) == 0;
|
||||
return startsWith(name, "sha256_implCompress")
|
||||
|| startsWith(name, "string_indexof_linear_")
|
||||
|| startsWith(name, "slow_subtype_check");
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@@ -100,43 +100,43 @@ static inline bool isZeroSizeFrame(const char* name) {
|
||||
// Dispatch by the first character to optimize lookup
|
||||
switch (name[0]) {
|
||||
case 'I':
|
||||
return strcmp(name, "InlineCacheBuffer") == 0;
|
||||
return streq(name, "InlineCacheBuffer");
|
||||
case 'S':
|
||||
return strncmp(name, "SafeFetch", 9) == 0;
|
||||
return startsWith(name, "SafeFetch");
|
||||
case 'a':
|
||||
return strncmp(name, "atomic", 6) == 0;
|
||||
return startsWith(name, "atomic");
|
||||
case 'b':
|
||||
return strncmp(name, "bigInteger", 10) == 0
|
||||
|| strcmp(name, "base64_encodeBlock") == 0;
|
||||
return startsWith(name, "bigInteger")
|
||||
|| startsWith(name, "base64_encodeBlock")
|
||||
|| streq(name, "backward_copy_longs");
|
||||
case 'c':
|
||||
return strncmp(name, "copy_", 5) == 0
|
||||
|| strncmp(name, "compare_long_string_", 20) == 0;
|
||||
return startsWith(name, "copy_")
|
||||
|| startsWith(name, "compare_long_string_");
|
||||
case 'e':
|
||||
return strcmp(name, "encodeBlock") == 0;
|
||||
return streq(name, "encodeBlock");
|
||||
case 'f':
|
||||
return strcmp(name, "f2hf") == 0;
|
||||
return startsWith(name, "f2hf")
|
||||
|| streq(name, "forward_copy_longs")
|
||||
|| streq(name, "foward_copy_longs"); // there is a typo in JDK 8
|
||||
case 'g':
|
||||
return strcmp(name, "ghash_processBlocks") == 0;
|
||||
return startsWith(name, "ghash_processBlocks") && strchr(name + 19, 'w') == NULL;
|
||||
case 'h':
|
||||
return strcmp(name, "hf2f") == 0;
|
||||
return startsWith(name, "hf2f");
|
||||
case 'i':
|
||||
return strncmp(name, "itable", 6) == 0;
|
||||
return startsWith(name, "itable");
|
||||
case 'l':
|
||||
return strcmp(name, "large_byte_array_inflate") == 0
|
||||
|| strncmp(name, "lookup_secondary_supers_", 24) == 0;
|
||||
return startsWith(name, "large_byte_array_inflate")
|
||||
|| startsWith(name, "lookup_secondary_supers_");
|
||||
case 'm':
|
||||
return strncmp(name, "md5_implCompress", 16) == 0;
|
||||
return startsWith(name, "md5_implCompress");
|
||||
case 's':
|
||||
return strncmp(name, "sha1_implCompress", 17) == 0
|
||||
|| strncmp(name, "compare_long_string_same_encoding", 33) == 0
|
||||
|| strcmp(name, "compare_long_string_LL") == 0
|
||||
|| strcmp(name, "compare_long_string_UU") == 0;
|
||||
return startsWith(name, "sha1_implCompress");
|
||||
case 'u':
|
||||
return strcmp(name, "updateBytesAdler32") == 0;
|
||||
return startsWith(name, "updateBytesAdler32");
|
||||
case 'v':
|
||||
return strncmp(name, "vtable", 6) == 0;
|
||||
return startsWith(name, "vtable");
|
||||
case 'z':
|
||||
return strncmp(name, "zero_", 5) == 0;
|
||||
return startsWith(name, "zero_");
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@@ -172,58 +172,10 @@ bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& p
|
||||
// Should be done after isSTP check, since frame size may vary between JVM versions
|
||||
pc = link();
|
||||
return true;
|
||||
} else if (strcmp(name, "forward_copy_longs") == 0
|
||||
|| strcmp(name, "backward_copy_longs") == 0
|
||||
// There is a typo in JDK 8
|
||||
|| strcmp(name, "foward_copy_longs") == 0) {
|
||||
// These are called from arraycopy stub that maintains the regular frame link
|
||||
if (&pc == &this->pc() && withinCurrentStack(fp)) {
|
||||
// Unwind both stub frames for AsyncGetCallTrace
|
||||
sp = fp + 16;
|
||||
fp = ((uintptr_t*)sp)[-2];
|
||||
pc = ((uintptr_t*)sp)[-1] - sizeof(instruction_t);
|
||||
} else {
|
||||
// When cstack=vm, unwind stub frames one by one
|
||||
pc = link();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool isEntryBarrier(instruction_t* ip) {
|
||||
// ldr w9, [x28, #32]
|
||||
// cmp x8, x9
|
||||
return ip[0] == 0xb9402389 && ip[1] == 0xeb09011f;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
instruction_t* entry = (instruction_t*)nm->entry();
|
||||
if ((*ip & 0xffe07fff) == 0xa9007bfd) {
|
||||
// stp x29, x30, [sp, #offset]
|
||||
// SP has been adjusted, but FP not yet stored in a new frame
|
||||
unsigned int offset = (*ip >> 12) & 0x1f8;
|
||||
sp += offset + 16;
|
||||
pc = link();
|
||||
} else if (ip > entry && ip[0] == 0x910003fd && ip[-1] == 0xa9bf7bfd) {
|
||||
// stp x29, x30, [sp, #-16]!
|
||||
// mov x29, sp
|
||||
sp += 16;
|
||||
pc = ((uintptr_t*)sp)[-1];
|
||||
} else if (ip > entry + 3 && !nm->isFrameCompleteAt(ip) &&
|
||||
(isEntryBarrier(ip) || isEntryBarrier(ip + 1))) {
|
||||
// Frame should be complete at this point
|
||||
sp += nm->frameSize() * sizeof(void*);
|
||||
fp = ((uintptr_t*)sp)[-2];
|
||||
pc = ((uintptr_t*)sp)[-1];
|
||||
} else {
|
||||
// Just try
|
||||
pc = link();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool isFrameComplete(instruction_t* entry, instruction_t* ip) {
|
||||
// Frame is fully constructed after sp is decremented by the frame size.
|
||||
// Check if there is such an instruction anywhere between
|
||||
@@ -330,7 +282,7 @@ bool StackFrame::unwindAtomicStub(const void*& pc) {
|
||||
const void* lr = (const void*)link();
|
||||
if (VMStructs::libjvm()->contains(lr)) {
|
||||
NMethod* nm = CodeHeap::findNMethod(pc);
|
||||
if (nm != NULL && strncmp(nm->name(), "Stub", 4) == 0) {
|
||||
if (nm != NULL && startsWith(nm->name(), "Stub")) {
|
||||
pc = lr;
|
||||
return true;
|
||||
}
|
||||
@@ -353,10 +305,6 @@ void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StackFrame::skipFaultInstruction() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::checkInterruptedSyscall() {
|
||||
#ifdef __APPLE__
|
||||
// We are not interested in syscalls that do not check error code, e.g. semaphore_wait_trap
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#if defined(__arm__) || defined(__thumb__)
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include "stackFrame.h"
|
||||
#include "vmStructs.h"
|
||||
|
||||
@@ -68,9 +67,9 @@ void StackFrame::ret() {
|
||||
bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
if (ip == entry || *ip == 0xe12fff1e
|
||||
|| strncmp(name, "itable", 6) == 0
|
||||
|| strncmp(name, "vtable", 6) == 0
|
||||
|| strcmp(name, "InlineCacheBuffer") == 0)
|
||||
|| startsWith(name, "itable")
|
||||
|| startsWith(name, "vtable")
|
||||
|| streq(name, "InlineCacheBuffer"))
|
||||
{
|
||||
pc = link();
|
||||
return true;
|
||||
@@ -78,29 +77,6 @@ bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& p
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
instruction_t* entry = (instruction_t*)nm->entry();
|
||||
if (ip > entry && ip <= entry + 4 && (*ip & 0xffffff00) == 0xe24dd000) {
|
||||
// push {r11, lr}
|
||||
// mov r11, sp (optional)
|
||||
// -> sub sp, sp, #offs
|
||||
fp = ((uintptr_t*)sp)[0];
|
||||
pc = ((uintptr_t*)sp)[1];
|
||||
sp += 8;
|
||||
return true;
|
||||
} else if (*ip == 0xe8bd4800) {
|
||||
// add sp, sp, #offs
|
||||
// -> pop {r11, lr}
|
||||
fp = ((uintptr_t*)sp)[0];
|
||||
pc = ((uintptr_t*)sp)[1];
|
||||
sp += 8;
|
||||
return true;
|
||||
}
|
||||
pc = link();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
instruction_t* entry = (instruction_t*)nm->entry();
|
||||
@@ -125,10 +101,6 @@ void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
|
||||
// Not needed
|
||||
}
|
||||
|
||||
bool StackFrame::skipFaultInstruction() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::checkInterruptedSyscall() {
|
||||
return retval() == (uintptr_t)-EINTR;
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#ifdef __i386__
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include "stackFrame.h"
|
||||
#include "vmStructs.h"
|
||||
|
||||
@@ -70,9 +69,9 @@ void StackFrame::ret() {
|
||||
bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
if (ip == entry || *ip == 0xc3
|
||||
|| strncmp(name, "itable", 6) == 0
|
||||
|| strncmp(name, "vtable", 6) == 0
|
||||
|| strcmp(name, "InlineCacheBuffer") == 0)
|
||||
|| startsWith(name, "itable")
|
||||
|| startsWith(name, "vtable")
|
||||
|| streq(name, "InlineCacheBuffer"))
|
||||
{
|
||||
pc = *(uintptr_t*)sp;
|
||||
sp += 4;
|
||||
@@ -95,27 +94,6 @@ bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& p
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
instruction_t* entry = (instruction_t*)nm->entry();
|
||||
if (ip <= entry
|
||||
|| *ip == 0xc3 // ret
|
||||
|| *ip == 0x55 // push ebp
|
||||
|| ip[-1] == 0x5d) // after pop ebp
|
||||
{
|
||||
pc = *(uintptr_t*)sp;
|
||||
sp += 4;
|
||||
return true;
|
||||
} else if (*ip == 0x5d) {
|
||||
// pop ebp
|
||||
fp = ((uintptr_t*)sp)[0];
|
||||
pc = ((uintptr_t*)sp)[1];
|
||||
sp += 8;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
instruction_t* entry = (instruction_t*)nm->entry();
|
||||
@@ -146,10 +124,6 @@ void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
|
||||
// Not needed
|
||||
}
|
||||
|
||||
bool StackFrame::skipFaultInstruction() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::checkInterruptedSyscall() {
|
||||
return retval() == (uintptr_t)-EINTR;
|
||||
}
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
#ifdef __loongarch_lp64
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include "stackFrame.h"
|
||||
#include "vmStructs.h"
|
||||
|
||||
#define REG(l) _ucontext->uc_mcontext.__gregs[l]
|
||||
|
||||
@@ -67,9 +67,9 @@ void StackFrame::ret() {
|
||||
bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
if (ip == entry
|
||||
|| strncmp(name, "itable", 6) == 0
|
||||
|| strncmp(name, "vtable", 6) == 0
|
||||
|| strcmp(name, "InlineCacheBuffer") == 0)
|
||||
|| startsWith(name, "itable")
|
||||
|| startsWith(name, "vtable")
|
||||
|| streq(name, "InlineCacheBuffer"))
|
||||
{
|
||||
pc = link();
|
||||
return true;
|
||||
@@ -77,11 +77,6 @@ bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& p
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
// Not yet implemented
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
// Not yet implemented
|
||||
return false;
|
||||
@@ -101,10 +96,6 @@ void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
|
||||
// Not yet implemented
|
||||
}
|
||||
|
||||
bool StackFrame::skipFaultInstruction() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::checkInterruptedSyscall() {
|
||||
return retval() == (uintptr_t)-EINTR;
|
||||
}
|
||||
|
||||
@@ -65,68 +65,11 @@ void StackFrame::ret() {
|
||||
pc() = link();
|
||||
}
|
||||
|
||||
static inline bool inC1EpilogueCrit(uintptr_t pc) {
|
||||
if (!(pc & 0xfff)) {
|
||||
// Make sure we are not at the page boundary, so that reading [pc - 1] is safe
|
||||
return false;
|
||||
}
|
||||
// C1 epilogue and critical section (posX)
|
||||
// 3821**** add r1,r1,xx
|
||||
// pos3 xxxxxxxx
|
||||
// pos2 1000e1eb ld r31,16(r1)
|
||||
// pos1 a603e87f mtlr r31
|
||||
// xxxxxxxx
|
||||
// 2000804e blr
|
||||
instruction_t* inst = (instruction_t*)pc;
|
||||
if (inst[ 1] == 0xebe10010 && inst[2] == 0x7fe803a6 ||
|
||||
inst[ 0] == 0xebe10010 && inst[1] == 0x7fe803a6 ||
|
||||
inst[-1] == 0xebe10010 && inst[0] == 0x7fe803a6) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false; // not in critical section
|
||||
}
|
||||
|
||||
static inline bool inC2PrologueCrit(uintptr_t pc) {
|
||||
// C2 prologue and critical section
|
||||
// f821**** stdu r1, (xx)r1
|
||||
// pos1 fa950010 std r20,16(r21)
|
||||
instruction_t* inst = (instruction_t*)pc;
|
||||
if (inst[0] == 0xfa950010 && (inst[-1] & 0xffff0000) == 0xf8210000) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false; // not in critical section
|
||||
}
|
||||
|
||||
|
||||
bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
pc = link();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
// On PPC there is a valid back link to the previous frame at all times. The callee stores
|
||||
// the return address in the caller's frame before it constructs its own frame. After it
|
||||
// has destroyed its frame it restores the link register and returns. A problematic sequence
|
||||
// is the prologue/epilogue of a compiled method before/after frame construction/destruction.
|
||||
// Therefore popping the frame would not help here, as it is not yet/anymore present, rather
|
||||
// more adjusting the pc to the callers pc does the trick. There are two exceptions to this,
|
||||
// One in the prologue of C2 compiled methods and one in the epilogue of C1 compiled methods.
|
||||
if (inC1EpilogueCrit(pc)) {
|
||||
// lr not yet set: use the value stored in the frame
|
||||
pc = ((uintptr_t*)sp)[2];
|
||||
} else if (inC2PrologueCrit(pc)) {
|
||||
// frame constructed but lr not yet stored in it: just do it here
|
||||
*(((unsigned long *) _ucontext->uc_mcontext.regs->gpr[21]) + 2) = (unsigned long) _ucontext->uc_mcontext.regs->gpr[20];
|
||||
} else {
|
||||
// most probably caller's framer is still on top but pc is already in callee: use caller's pc
|
||||
pc = link();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
// Not yet implemented
|
||||
return false;
|
||||
@@ -146,10 +89,6 @@ void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
|
||||
// Not needed
|
||||
}
|
||||
|
||||
bool StackFrame::skipFaultInstruction() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::checkInterruptedSyscall() {
|
||||
return retval() == (uintptr_t)-EINTR;
|
||||
}
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
#if defined(__riscv) && (__riscv_xlen == 64)
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include "stackFrame.h"
|
||||
#include "vmStructs.h"
|
||||
|
||||
#define REG(l) _ucontext->uc_mcontext.__gregs[l]
|
||||
|
||||
@@ -67,9 +67,9 @@ void StackFrame::ret() {
|
||||
bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
if (ip == entry
|
||||
|| strncmp(name, "itable", 6) == 0
|
||||
|| strncmp(name, "vtable", 6) == 0
|
||||
|| strcmp(name, "InlineCacheBuffer") == 0)
|
||||
|| startsWith(name, "itable")
|
||||
|| startsWith(name, "vtable")
|
||||
|| streq(name, "InlineCacheBuffer"))
|
||||
{
|
||||
pc = link();
|
||||
return true;
|
||||
@@ -77,11 +77,6 @@ bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& p
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
// Not yet implemented
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
// Not yet implemented
|
||||
return false;
|
||||
@@ -101,10 +96,6 @@ void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
|
||||
// Not yet implemented
|
||||
}
|
||||
|
||||
bool StackFrame::skipFaultInstruction() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::checkInterruptedSyscall() {
|
||||
return retval() == (uintptr_t)-EINTR;
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#ifdef __x86_64__
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include "stackFrame.h"
|
||||
#include "vmStructs.h"
|
||||
@@ -77,9 +76,9 @@ void StackFrame::ret() {
|
||||
bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
if (ip == entry || *ip == 0xc3
|
||||
|| strncmp(name, "itable", 6) == 0
|
||||
|| strncmp(name, "vtable", 6) == 0
|
||||
|| strcmp(name, "InlineCacheBuffer") == 0)
|
||||
|| startsWith(name, "itable")
|
||||
|| startsWith(name, "vtable")
|
||||
|| streq(name, "InlineCacheBuffer"))
|
||||
{
|
||||
pc = ((uintptr_t*)sp)[0] - 1;
|
||||
sp += 8;
|
||||
@@ -102,48 +101,6 @@ bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& p
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
instruction_t* ip = (instruction_t*)pc;
|
||||
instruction_t* entry = (instruction_t*)nm->entry();
|
||||
if (ip <= entry
|
||||
|| *ip == 0xc3 // ret
|
||||
|| *ip == 0x55 // push rbp
|
||||
|| ip[-1] == 0x5d // after pop rbp
|
||||
|| (ip[0] == 0x41 && ip[1] == 0x85 && ip[2] == 0x02 && ip[3] == 0xc3)) // poll return
|
||||
{
|
||||
// Subtract 1 for PC to point to the call instruction,
|
||||
// otherwise it may be attributed to a wrong bytecode
|
||||
pc = ((uintptr_t*)sp)[0] - 1;
|
||||
sp += 8;
|
||||
return true;
|
||||
} else if (*ip == 0x5d) {
|
||||
// pop rbp
|
||||
fp = ((uintptr_t*)sp)[0];
|
||||
pc = ((uintptr_t*)sp)[1] - 1;
|
||||
sp += 16;
|
||||
return true;
|
||||
} else if (ip <= entry + 15 && ((uintptr_t)ip & 0xfff) && ip[-1] == 0x55) {
|
||||
// push rbp
|
||||
pc = ((uintptr_t*)sp)[1] - 1;
|
||||
sp += 16;
|
||||
return true;
|
||||
} else if (ip <= entry + 7 && ip[0] == 0x48 && ip[1] == 0x89 && ip[2] == 0x6c && ip[3] == 0x24) {
|
||||
// mov [rsp + #off], rbp
|
||||
sp += ip[4] + 16;
|
||||
pc = ((uintptr_t*)sp)[-1] - 1;
|
||||
return true;
|
||||
} else if ((ip[0] == 0x41 && ip[1] == 0x81 && ip[2] == 0x7f && *(u32*)(ip + 4) == 1) ||
|
||||
(ip >= entry + 8 && ip[-8] == 0x41 && ip[-7] == 0x81 && ip[-6] == 0x7f && *(u32*)(ip - 4) == 1)) {
|
||||
// cmp [r15 + #off], 1
|
||||
// nmethod_entry_barrier: frame is fully constructed here
|
||||
sp += nm->frameSize() * sizeof(void*);
|
||||
fp = ((uintptr_t*)sp)[-2];
|
||||
pc = ((uintptr_t*)sp)[-1];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool isFrameComplete(instruction_t* entry, instruction_t* ip) {
|
||||
// Frame is fully constructed after rsp is decremented by the frame size.
|
||||
// Check if there is such an instruction anywhere between
|
||||
@@ -247,40 +204,6 @@ void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
|
||||
// Not needed
|
||||
}
|
||||
|
||||
// Skip failed MOV instruction by writing 0 to destination register
|
||||
bool StackFrame::skipFaultInstruction() {
|
||||
unsigned int insn = *(unsigned int*)pc();
|
||||
if ((insn & 0x80fff8) == 0x008b48) {
|
||||
// mov r64, [r64 + offs]
|
||||
unsigned int reg = ((insn << 1) & 8) | ((insn >> 19) & 7);
|
||||
switch (reg) {
|
||||
case 0x0: REG(RAX, rax) = 0; break;
|
||||
case 0x1: REG(RCX, rcx) = 0; break;
|
||||
case 0x2: REG(RDX, rdx) = 0; break;
|
||||
case 0x3: REG(RBX, rbx) = 0; break;
|
||||
case 0x4: return false; // Do not modify RSP
|
||||
case 0x5: REG(RBP, rbp) = 0; break;
|
||||
case 0x6: REG(RSI, rsi) = 0; break;
|
||||
case 0x7: REG(RDI, rdi) = 0; break;
|
||||
case 0x8: REG(R8 , r8 ) = 0; break;
|
||||
case 0x9: REG(R9 , r9 ) = 0; break;
|
||||
case 0xa: REG(R10, r10) = 0; break;
|
||||
case 0xb: REG(R11, r11) = 0; break;
|
||||
case 0xc: REG(R12, r12) = 0; break;
|
||||
case 0xd: REG(R13, r13) = 0; break;
|
||||
case 0xe: REG(R14, r14) = 0; break;
|
||||
case 0xf: REG(R15, r15) = 0; break;
|
||||
}
|
||||
|
||||
unsigned int insn_size = 3;
|
||||
if ((insn & 0x070000) == 0x040000) insn_size++;
|
||||
if ((insn & 0x400000) == 0x400000) insn_size++;
|
||||
pc() += insn_size;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackFrame::checkInterruptedSyscall() {
|
||||
#ifdef __APPLE__
|
||||
// We are not interested in syscalls that do not check error code, e.g. semaphore_wait_trap
|
||||
|
||||
@@ -62,7 +62,7 @@ static jmethodID getMethodId(VMMethod* method) {
|
||||
}
|
||||
|
||||
|
||||
int StackWalker::walkFP(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx) {
|
||||
int StackWalker::walkFP(void* ucontext, const void** callchain, int max_depth) {
|
||||
const void* pc;
|
||||
uintptr_t fp;
|
||||
uintptr_t sp;
|
||||
@@ -84,7 +84,6 @@ int StackWalker::walkFP(void* ucontext, const void** callchain, int max_depth, S
|
||||
// Walk until the bottom of the stack or until the first Java frame
|
||||
while (depth < max_depth) {
|
||||
if (CodeHeap::contains(pc) && !(depth == 0 && frame.unwindAtomicStub(pc))) {
|
||||
java_ctx->set(pc, sp, fp);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -112,7 +111,7 @@ int StackWalker::walkFP(void* ucontext, const void** callchain, int max_depth, S
|
||||
return depth;
|
||||
}
|
||||
|
||||
int StackWalker::walkDwarf(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx) {
|
||||
int StackWalker::walkDwarf(void* ucontext, const void** callchain, int max_depth) {
|
||||
const void* pc;
|
||||
uintptr_t fp;
|
||||
uintptr_t sp;
|
||||
@@ -135,9 +134,6 @@ int StackWalker::walkDwarf(void* ucontext, const void** callchain, int max_depth
|
||||
// Walk until the bottom of the stack or until the first Java frame
|
||||
while (depth < max_depth) {
|
||||
if (CodeHeap::contains(pc) && !(depth == 0 && frame.unwindAtomicStub(pc))) {
|
||||
// Don't dereference pc as it may point to unreadable memory
|
||||
// frame.adjustSP(page_start, pc, sp);
|
||||
java_ctx->set(pc, sp, fp);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -147,6 +143,7 @@ int StackWalker::walkDwarf(void* ucontext, const void** callchain, int max_depth
|
||||
CodeCache* cc = profiler->findLibraryByAddress(pc);
|
||||
FrameDesc* f = cc != NULL ? cc->findFrameDesc(pc) : &FrameDesc::default_frame;
|
||||
|
||||
retry_unwind_frame:
|
||||
u8 cfa_reg = (u8)f->cfa;
|
||||
int cfa_off = f->cfa >> 8;
|
||||
if (cfa_reg == DW_REG_SP) {
|
||||
@@ -179,9 +176,13 @@ int StackWalker::walkDwarf(void* ucontext, const void** callchain, int max_depth
|
||||
|
||||
if (EMPTY_FRAME_SIZE > 0 || f->pc_off != DW_LINK_REGISTER) {
|
||||
pc = stripPointer(SafeAccess::load((void**)(sp + f->pc_off)));
|
||||
} else if (depth == 1) {
|
||||
pc = (const void*)frame.link();
|
||||
} else {
|
||||
} else if (depth > 1 || (pc = (const void*)frame.link()) == prev_pc) {
|
||||
// Failed to unwind using link register
|
||||
if (f->cfa == DW_REG_SP && fp == sp) {
|
||||
// Special case for vDSO: if an empty frame did not work, try the default frame
|
||||
f = &FrameDesc::default_frame;
|
||||
goto retry_unwind_frame;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -262,6 +263,7 @@ int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
|
||||
}
|
||||
prev_sp = sp;
|
||||
|
||||
CodeCache* native_lib = NULL;
|
||||
if (CodeHeap::contains(pc)) {
|
||||
NMethod* nm = CodeHeap::findNMethod(pc);
|
||||
if (nm == NULL) {
|
||||
@@ -283,44 +285,7 @@ int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
|
||||
anchor = NULL;
|
||||
}
|
||||
|
||||
if (nm->isNMethod()) {
|
||||
int level = nm->level();
|
||||
FrameTypeId type = details && level >= 1 && level <= 3 ? FRAME_C1_COMPILED : FRAME_JIT_COMPILED;
|
||||
fillFrame(frames[depth++], type, 0, nm->method()->id());
|
||||
|
||||
if (nm->isFrameCompleteAt(pc)) {
|
||||
if (depth == 1 && frame.unwindEpilogue(nm, (uintptr_t&)pc, sp, fp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int scope_offset = nm->findScopeOffset(pc);
|
||||
if (scope_offset > 0) {
|
||||
depth--;
|
||||
ScopeDesc scope(nm);
|
||||
do {
|
||||
scope_offset = scope.decode(scope_offset);
|
||||
if (details) {
|
||||
type = scope_offset > 0 ? FRAME_INLINED :
|
||||
level >= 1 && level <= 3 ? FRAME_C1_COMPILED : FRAME_JIT_COMPILED;
|
||||
}
|
||||
fillFrame(frames[depth++], type, scope.bci(), scope.method()->id());
|
||||
} while (scope_offset > 0 && depth < max_depth);
|
||||
}
|
||||
|
||||
// Handle situations when sp is temporarily changed in the compiled code
|
||||
frame.adjustSP(nm->entry(), pc, sp);
|
||||
|
||||
sp += nm->frameSize() * sizeof(void*);
|
||||
fp = ((uintptr_t*)sp)[-FRAME_PC_SLOT - 1];
|
||||
pc = ((const void**)sp)[-FRAME_PC_SLOT];
|
||||
continue;
|
||||
} else if (frame.unwindPrologue(nm, (uintptr_t&)pc, sp, fp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
fillFrame(frames[depth++], BCI_ERROR, "break_compiled");
|
||||
break;
|
||||
} else if (nm->isInterpreter()) {
|
||||
if (nm->isInterpreter()) {
|
||||
if (vm_thread != NULL && vm_thread->inDeopt()) {
|
||||
fillFrame(frames[depth++], BCI_ERROR, "break_deopt");
|
||||
break;
|
||||
@@ -366,6 +331,43 @@ int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
|
||||
|
||||
fillFrame(frames[depth++], BCI_ERROR, "break_interpreted");
|
||||
break;
|
||||
} else if (nm->isNMethod()) {
|
||||
int level = nm->level();
|
||||
FrameTypeId type = details && level >= 1 && level <= 3 ? FRAME_C1_COMPILED : FRAME_JIT_COMPILED;
|
||||
fillFrame(frames[depth++], type, 0, nm->method()->id());
|
||||
|
||||
if (nm->isFrameCompleteAt(pc)) {
|
||||
if (depth == 1 && frame.unwindEpilogue(nm, (uintptr_t&)pc, sp, fp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int scope_offset = nm->findScopeOffset(pc);
|
||||
if (scope_offset > 0) {
|
||||
depth--;
|
||||
ScopeDesc scope(nm);
|
||||
do {
|
||||
scope_offset = scope.decode(scope_offset);
|
||||
if (details) {
|
||||
type = scope_offset > 0 ? FRAME_INLINED :
|
||||
level >= 1 && level <= 3 ? FRAME_C1_COMPILED : FRAME_JIT_COMPILED;
|
||||
}
|
||||
fillFrame(frames[depth++], type, scope.bci(), scope.method()->id());
|
||||
} while (scope_offset > 0 && depth < max_depth);
|
||||
}
|
||||
|
||||
// Handle situations when sp is temporarily changed in the compiled code
|
||||
frame.adjustSP(nm->entry(), pc, sp);
|
||||
|
||||
sp += nm->frameSize() * sizeof(void*);
|
||||
fp = ((uintptr_t*)sp)[-FRAME_PC_SLOT - 1];
|
||||
pc = ((const void**)sp)[-FRAME_PC_SLOT];
|
||||
continue;
|
||||
} else if (frame.unwindPrologue(nm, (uintptr_t&)pc, sp, fp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
fillFrame(frames[depth++], BCI_ERROR, "break_compiled");
|
||||
break;
|
||||
} else if (nm->isEntryFrame(pc) && !features.mixed) {
|
||||
JavaFrameAnchor* next_anchor = JavaFrameAnchor::fromEntryFrame(fp);
|
||||
if (next_anchor == NULL) {
|
||||
@@ -395,6 +397,11 @@ int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
|
||||
fillFrame(frames[depth++], BCI_NATIVE_FRAME, name);
|
||||
}
|
||||
|
||||
if (startsWith(name, "cont") || startsWith(name, "Cont ")) {
|
||||
// Walking past virtual thread continuation barriers is not currently supported
|
||||
break;
|
||||
}
|
||||
|
||||
if (frame.unwindStub((instruction_t*)start, name, (uintptr_t&)pc, sp, fp)) {
|
||||
continue;
|
||||
}
|
||||
@@ -407,7 +414,8 @@ int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const char* method_name = profiler->findNativeMethod(pc);
|
||||
native_lib = profiler->findLibraryByAddress(pc);
|
||||
const char* method_name = native_lib != NULL ? native_lib->binarySearch(pc) : NULL;
|
||||
char mark;
|
||||
if (method_name != NULL && (mark = NativeFunc::mark(method_name)) != 0) {
|
||||
if (mark == MARK_ASYNC_PROFILER && (event_type == MALLOC_SAMPLE || event_type == NATIVE_LOCK_SAMPLE)) {
|
||||
@@ -425,9 +433,9 @@ int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
|
||||
fillFrame(frames[depth++], BCI_NATIVE_FRAME, method_name);
|
||||
}
|
||||
|
||||
CodeCache* cc = profiler->findLibraryByAddress(pc);
|
||||
FrameDesc* f = cc != NULL ? cc->findFrameDesc(pc) : &FrameDesc::default_frame;
|
||||
FrameDesc* f = native_lib != NULL ? native_lib->findFrameDesc(pc) : &FrameDesc::default_frame;
|
||||
|
||||
retry_unwind_frame:
|
||||
u8 cfa_reg = (u8)f->cfa;
|
||||
int cfa_off = f->cfa >> 8;
|
||||
if (cfa_reg == DW_REG_SP) {
|
||||
@@ -460,9 +468,13 @@ int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
|
||||
|
||||
if (EMPTY_FRAME_SIZE > 0 || f->pc_off != DW_LINK_REGISTER) {
|
||||
pc = stripPointer(*(void**)(sp + f->pc_off));
|
||||
} else if (depth == 1) {
|
||||
pc = (const void*)frame.link();
|
||||
} else {
|
||||
} else if (depth > 1 || (pc = (const void*)frame.link()) == prev_pc) {
|
||||
// Failed to unwind using link register
|
||||
if (f->cfa == DW_REG_SP && fp == sp) {
|
||||
// Special case for vDSO: if an empty frame did not work, try the default frame
|
||||
f = &FrameDesc::default_frame;
|
||||
goto retry_unwind_frame;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -14,23 +14,10 @@
|
||||
|
||||
class JavaFrameAnchor;
|
||||
|
||||
struct StackContext {
|
||||
const void* pc;
|
||||
uintptr_t sp;
|
||||
uintptr_t fp;
|
||||
u64 cpu;
|
||||
|
||||
void set(const void* pc, uintptr_t sp, uintptr_t fp) {
|
||||
this->pc = pc;
|
||||
this->sp = sp;
|
||||
this->fp = fp;
|
||||
}
|
||||
};
|
||||
|
||||
class StackWalker {
|
||||
public:
|
||||
static int walkFP(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx);
|
||||
static int walkDwarf(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx);
|
||||
static int walkFP(void* ucontext, const void** callchain, int max_depth);
|
||||
static int walkDwarf(void* ucontext, const void** callchain, int max_depth);
|
||||
static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, int lock_index,
|
||||
StackWalkFeatures features, EventType event_type);
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ void ThreadFilter::remove(int thread_id) {
|
||||
|
||||
u32 bit = 1 << (thread_id & 0x1f);
|
||||
if (__sync_fetch_and_and(&word(b, thread_id), ~bit) & bit) {
|
||||
atomicInc(_size, -1);
|
||||
atomicDec(_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,6 +25,10 @@
|
||||
const int ARGUMENTS_ERROR = 100;
|
||||
const int COMMAND_ERROR = 200;
|
||||
|
||||
static constexpr int JMETHOD_ID_LIMIT = 1024 * 1024 * 500 / 8; // 500 MiB memory, about 65 million methods
|
||||
static int _jmethod_id_count = 0;
|
||||
static bool _jmethod_id_count_warned = false;
|
||||
|
||||
JavaVM* VM::_vm;
|
||||
jvmtiEnv* VM::_jvmti = NULL;
|
||||
|
||||
@@ -44,62 +48,62 @@ JVM_MemoryFunc VM::_totalMemory;
|
||||
JVM_MemoryFunc VM::_freeMemory;
|
||||
|
||||
static bool isVmRuntimeEntry(const char* blob_name) {
|
||||
return strcmp(blob_name, "_ZNK12MemAllocator8allocateEv") == 0
|
||||
|| strncmp(blob_name, "_Z22post_allocation_notify", 26) == 0
|
||||
|| strncmp(blob_name, "_ZN11OptoRuntime", 16) == 0
|
||||
|| strncmp(blob_name, "_ZN8Runtime1", 12) == 0
|
||||
|| strncmp(blob_name, "_ZN13SharedRuntime", 18) == 0
|
||||
|| strncmp(blob_name, "_ZN18InterpreterRuntime", 23) == 0;
|
||||
return streq(blob_name, "_ZNK12MemAllocator8allocateEv")
|
||||
|| startsWith(blob_name, "_Z22post_allocation_notify")
|
||||
|| startsWith(blob_name, "_ZN11OptoRuntime")
|
||||
|| startsWith(blob_name, "_ZN8Runtime1")
|
||||
|| startsWith(blob_name, "_ZN13SharedRuntime")
|
||||
|| startsWith(blob_name, "_ZN18InterpreterRuntime");
|
||||
}
|
||||
|
||||
static bool isZingRuntimeEntry(const char* blob_name) {
|
||||
return strncmp(blob_name, "_ZN14DolphinRuntime", 19) == 0
|
||||
|| strncmp(blob_name, "_ZN37JvmtiSampledObjectAllocEventCollector", 42) == 0;
|
||||
return startsWith(blob_name, "_ZN14DolphinRuntime")
|
||||
|| startsWith(blob_name, "_ZN37JvmtiSampledObjectAllocEventCollector");
|
||||
}
|
||||
|
||||
static bool isZeroInterpreterMethod(const char* blob_name) {
|
||||
return strncmp(blob_name, "_ZN15ZeroInterpreter", 20) == 0
|
||||
|| strncmp(blob_name, "_ZN19BytecodeInterpreter3run", 28) == 0;
|
||||
return startsWith(blob_name, "_ZN15ZeroInterpreter")
|
||||
|| startsWith(blob_name, "_ZN19BytecodeInterpreter3run");
|
||||
}
|
||||
|
||||
static bool isOpenJ9InterpreterMethod(const char* blob_name) {
|
||||
return strncmp(blob_name, "_ZN32VM_BytecodeInterpreter", 27) == 0
|
||||
|| strncmp(blob_name, "_ZN26VM_BytecodeInterpreter", 27) == 0
|
||||
|| strncmp(blob_name, "bytecodeLoop", 12) == 0
|
||||
|| strcmp(blob_name, "cInterpreter") == 0;
|
||||
return startsWith(blob_name, "_ZN32VM_BytecodeInterpreter")
|
||||
|| startsWith(blob_name, "_ZN26VM_BytecodeInterpreter")
|
||||
|| startsWith(blob_name, "bytecodeLoop")
|
||||
|| streq(blob_name, "cInterpreter");
|
||||
}
|
||||
|
||||
static bool isOpenJ9JitStub(const char* blob_name) {
|
||||
if (strncmp(blob_name, "jit", 3) == 0) {
|
||||
if (startsWith(blob_name, "jit")) {
|
||||
blob_name += 3;
|
||||
return strcmp(blob_name, "NewObject") == 0
|
||||
|| strcmp(blob_name, "NewArray") == 0
|
||||
|| strcmp(blob_name, "ANewArray") == 0
|
||||
|| strcmp(blob_name, "AMultiNewArray") == 0;
|
||||
return streq(blob_name, "NewObject")
|
||||
|| streq(blob_name, "NewArray")
|
||||
|| streq(blob_name, "ANewArray")
|
||||
|| streq(blob_name, "AMultiNewArray");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isOpenJ9Resolve(const char* blob_name) {
|
||||
return strncmp(blob_name, "resolve", 7) == 0;
|
||||
return startsWith(blob_name, "resolve");
|
||||
}
|
||||
|
||||
static bool isOpenJ9JitAlloc(const char* blob_name) {
|
||||
return strncmp(blob_name, "old_", 4) == 0;
|
||||
return startsWith(blob_name, "old_");
|
||||
}
|
||||
|
||||
static bool isOpenJ9GcAlloc(const char* blob_name) {
|
||||
return strncmp(blob_name, "J9Allocate", 10) == 0;
|
||||
return startsWith(blob_name, "J9Allocate");
|
||||
}
|
||||
|
||||
static bool isOpenJ9JvmtiAlloc(const char* blob_name) {
|
||||
return strcmp(blob_name, "jvmtiHookSampledObjectAlloc") == 0 ||
|
||||
strcmp(blob_name, "jvmtiHookObjectAllocate") == 0;
|
||||
return streq(blob_name, "jvmtiHookSampledObjectAlloc") ||
|
||||
streq(blob_name, "jvmtiHookObjectAllocate");
|
||||
}
|
||||
|
||||
static bool isCompilerEntry(const char* blob_name) {
|
||||
return strncmp(blob_name, "_ZN8Compiler14compile_method", 28) == 0 ||
|
||||
strncmp(blob_name, "_ZN10C2Compiler14compile_method", 31) == 0;
|
||||
return startsWith(blob_name, "_ZN8Compiler14compile_method") ||
|
||||
startsWith(blob_name, "_ZN10C2Compiler14compile_method");
|
||||
}
|
||||
|
||||
static void* resolveMethodId(void** mid) {
|
||||
@@ -364,7 +368,13 @@ void VM::applyPatch(char* func, const char* patch, const char* end_patch) {
|
||||
}
|
||||
}
|
||||
|
||||
void VM::loadMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni, jclass klass) {
|
||||
void VM::loadMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni, jclass klass, bool update_count) {
|
||||
if (loadAcquire(_jmethod_id_count) > JMETHOD_ID_LIMIT) {
|
||||
if (__sync_bool_compare_and_swap(&_jmethod_id_count_warned, false, true)) {
|
||||
Log::warn("Total number of generated jmethod-ids exceeds %d, stop generating more", JMETHOD_ID_LIMIT);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (VMStructs::hasClassLoaderData()) {
|
||||
VMKlass* vmklass = VMKlass::fromJavaClass(jni, klass);
|
||||
int method_count = vmklass->methodCount();
|
||||
@@ -383,6 +393,9 @@ void VM::loadMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni, jclass klass) {
|
||||
jint method_count;
|
||||
jmethodID* methods;
|
||||
if (jvmti->GetClassMethods(klass, &method_count, &methods) == 0) {
|
||||
if (update_count) {
|
||||
atomicInc(_jmethod_id_count, method_count);
|
||||
}
|
||||
jvmti->Deallocate((unsigned char*)methods);
|
||||
}
|
||||
}
|
||||
@@ -433,7 +446,7 @@ jvmtiError VM::RedefineClassesHook(jvmtiEnv* jvmti, jint class_count, const jvmt
|
||||
JNIEnv* env = jni();
|
||||
for (int i = 0; i < class_count; i++) {
|
||||
if (class_definitions[i].klass != NULL) {
|
||||
loadMethodIDs(jvmti, env, class_definitions[i].klass);
|
||||
loadMethodIDs(jvmti, env, class_definitions[i].klass, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -449,7 +462,7 @@ jvmtiError VM::RetransformClassesHook(jvmtiEnv* jvmti, jint class_count, const j
|
||||
JNIEnv* env = jni();
|
||||
for (int i = 0; i < class_count; i++) {
|
||||
if (classes[i] != NULL) {
|
||||
loadMethodIDs(jvmti, env, classes[i]);
|
||||
loadMethodIDs(jvmti, env, classes[i], false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include <jvmti.h>
|
||||
#include "arch.h"
|
||||
|
||||
|
||||
enum FrameTypeId {
|
||||
FRAME_INTERPRETED = 0,
|
||||
FRAME_JIT_COMPILED = 1,
|
||||
@@ -114,7 +113,7 @@ class VM {
|
||||
|
||||
static void ready();
|
||||
static void applyPatch(char* func, const char* patch, const char* end_patch);
|
||||
static void loadMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni, jclass klass);
|
||||
static void loadMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni, jclass klass, bool update_count = true);
|
||||
static void loadAllMethodIDs(jvmtiEnv* jvmti, JNIEnv* jni);
|
||||
static bool hasJvmThreads();
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include "vmStructs.h"
|
||||
#include "vmEntry.h"
|
||||
@@ -105,8 +106,8 @@ int VMStructs::_interpreter_frame_bcp_offset = 0;
|
||||
unsigned char VMStructs::_unsigned5_base = 0;
|
||||
const void** VMStructs::_call_stub_return_addr = NULL;
|
||||
const void* VMStructs::_call_stub_return = NULL;
|
||||
const void* VMStructs::_interpreted_frame_valid_start = NULL;
|
||||
const void* VMStructs::_interpreted_frame_valid_end = NULL;
|
||||
const void* VMStructs::_interpreter_start = NULL;
|
||||
NMethod* VMStructs::_interpreter_nm = NULL;
|
||||
|
||||
jfieldID VMStructs::_eetop;
|
||||
jfieldID VMStructs::_tid;
|
||||
@@ -133,7 +134,6 @@ void VMStructs::init(CodeCache* libjvm) {
|
||||
if (libjvm != NULL) {
|
||||
_libjvm = libjvm;
|
||||
initOffsets();
|
||||
initJvmFunctions();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -479,11 +479,16 @@ void VMStructs::resolveOffsets() {
|
||||
&& _comp_task_offset >= 0
|
||||
&& _comp_method_offset >= 0;
|
||||
|
||||
_has_class_loader_data = _class_loader_data_offset >= 0
|
||||
&& _class_loader_data_next_offset == sizeof(uintptr_t) * 8 + 8
|
||||
&& _methods_offset >= 0
|
||||
&& _klass != NULL
|
||||
&& _lock_func != NULL && _unlock_func != NULL;
|
||||
if (VM::hotspot_version() == 8) {
|
||||
_lock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor28lock_without_safepoint_checkEv");
|
||||
_unlock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor6unlockEv");
|
||||
_has_class_loader_data = _class_loader_data_offset >= 0
|
||||
&& _class_loader_data_next_offset == sizeof(uintptr_t) * 8 + 8
|
||||
&& _methods_offset >= 0
|
||||
&& _klass != NULL
|
||||
&& _lock_func != NULL
|
||||
&& _unlock_func != NULL;
|
||||
}
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
_interpreter_frame_bcp_offset = VM::hotspot_version() >= 11 ? -8 : VM::hotspot_version() == 8 ? -7 : 0;
|
||||
@@ -549,6 +554,9 @@ void VMStructs::resolveOffsets() {
|
||||
_heap_block_used_offset < 0) {
|
||||
memset(_code_heap, 0, sizeof(_code_heap));
|
||||
}
|
||||
if (_interpreter_nm == NULL && _interpreter_start != NULL) {
|
||||
_interpreter_nm = CodeHeap::findNMethod(_interpreter_start);
|
||||
}
|
||||
|
||||
if (_collected_heap_addr != NULL && _collected_heap_reserved_offset >= 0 &&
|
||||
_region_start_offset >= 0 && _region_size_offset >= 0) {
|
||||
@@ -556,21 +564,6 @@ void VMStructs::resolveOffsets() {
|
||||
}
|
||||
}
|
||||
|
||||
void VMStructs::initJvmFunctions() {
|
||||
if (VM::hotspot_version() == 8) {
|
||||
_lock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor28lock_without_safepoint_checkEv");
|
||||
_unlock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor6unlockEv");
|
||||
}
|
||||
|
||||
if (VM::hotspot_version() > 0) {
|
||||
CodeBlob* blob = _libjvm->findBlob("_ZNK5frame26is_interpreted_frame_validEP10JavaThread");
|
||||
if (blob != NULL) {
|
||||
_interpreted_frame_valid_start = blob->_start;
|
||||
_interpreted_frame_valid_end = blob->_end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VMStructs::patchSafeFetch() {
|
||||
// Workarounds for JDK-8307549 and JDK-8321116
|
||||
if (WX_MEMORY && VM::hotspot_version() == 17) {
|
||||
@@ -623,10 +616,12 @@ void VMStructs::initThreadBridge() {
|
||||
|
||||
VMThread* vm_thread = VMThread::fromJavaThread(env, thread);
|
||||
if (vm_thread != NULL) {
|
||||
_has_native_thread_id = _thread_osthread_offset >= 0 && _osthread_id_offset >= 0;
|
||||
initTLS(vm_thread);
|
||||
_env_offset = (intptr_t)env - (intptr_t)vm_thread;
|
||||
memcpy(_java_thread_vtbl, vm_thread->vtable(), sizeof(_java_thread_vtbl));
|
||||
if (!VM::isZing()) {
|
||||
_has_native_thread_id = _thread_osthread_offset >= 0 && _osthread_id_offset >= 0;
|
||||
_env_offset = (intptr_t)env - (intptr_t)vm_thread;
|
||||
memcpy(_java_thread_vtbl, vm_thread->vtable(), sizeof(_java_thread_vtbl));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,11 +8,32 @@
|
||||
|
||||
#include <jvmti.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <type_traits>
|
||||
#include "codeCache.h"
|
||||
|
||||
|
||||
// Inline string comparison to avoid indirect call to strcmp
|
||||
template<size_t N>
|
||||
static bool streq(const char* s, const char (&pattern)[N]) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
if (s[i] != pattern[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Same as streq but compares one byte less
|
||||
template<size_t N>
|
||||
static bool startsWith(const char* s, const char (&pattern)[N]) {
|
||||
for (size_t i = 0; i < N - 1; i++) {
|
||||
if (s[i] != pattern[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
class NMethod;
|
||||
class VMMethod;
|
||||
|
||||
class VMStructs {
|
||||
protected:
|
||||
enum { MONITOR_BIT = 2 };
|
||||
@@ -111,8 +132,8 @@ class VMStructs {
|
||||
static unsigned char _unsigned5_base;
|
||||
static const void** _call_stub_return_addr;
|
||||
static const void* _call_stub_return;
|
||||
static const void* _interpreted_frame_valid_start;
|
||||
static const void* _interpreted_frame_valid_end;
|
||||
static const void* _interpreter_start;
|
||||
static NMethod* _interpreter_nm;
|
||||
|
||||
static jfieldID _eetop;
|
||||
static jfieldID _tid;
|
||||
@@ -129,7 +150,6 @@ class VMStructs {
|
||||
static void initOffsets();
|
||||
static void resolveOffsets();
|
||||
static void patchSafeFetch();
|
||||
static void initJvmFunctions();
|
||||
static void initTLS(void* vm_thread);
|
||||
static void initThreadBridge();
|
||||
|
||||
@@ -182,10 +202,6 @@ class VMStructs {
|
||||
static bool hasJavaThreadId() {
|
||||
return _tid != NULL;
|
||||
}
|
||||
|
||||
static bool isInterpretedFrameValidFunc(const void* pc) {
|
||||
return pc >= _interpreted_frame_valid_start && pc < _interpreted_frame_valid_end;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -207,10 +223,6 @@ class MethodList {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class NMethod;
|
||||
class VMMethod;
|
||||
|
||||
class VMSymbol : VMStructs {
|
||||
public:
|
||||
unsigned short length() {
|
||||
@@ -300,7 +312,7 @@ class VMKlass : VMStructs {
|
||||
}
|
||||
|
||||
jmethodID* jmethodIDs() {
|
||||
return __atomic_load_n((jmethodID**) at(_jmethod_ids_offset), __ATOMIC_ACQUIRE);
|
||||
return loadAcquire(*(jmethodID**) at(_jmethod_ids_offset));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -329,10 +341,6 @@ class JavaFrameAnchor : VMStructs {
|
||||
return *(const void**) at(_anchor_pc_offset);
|
||||
}
|
||||
|
||||
void setLastJavaPC(const void* pc) {
|
||||
*(const void**) at(_anchor_pc_offset) = pc;
|
||||
}
|
||||
|
||||
bool getFrame(const void*& pc, uintptr_t& sp, uintptr_t& fp) {
|
||||
if (lastJavaPC() == NULL || lastJavaSP() == 0) {
|
||||
return false;
|
||||
@@ -401,10 +409,6 @@ class VMThread : VMStructs {
|
||||
return _thread_state_offset >= 0 ? *(int*) at(_thread_state_offset) : 0;
|
||||
}
|
||||
|
||||
bool inJava() {
|
||||
return state() == 8;
|
||||
}
|
||||
|
||||
bool inDeopt() {
|
||||
return *(void**) at(_thread_vframe_offset) != NULL;
|
||||
}
|
||||
@@ -462,15 +466,6 @@ class NMethod : VMStructs {
|
||||
return *(short*) at(_frame_complete_offset);
|
||||
}
|
||||
|
||||
void setFrameCompleteOffset(int offset) {
|
||||
if (_nmethod_immutable_offset > 0) {
|
||||
// _frame_complete_offset is short on JDK 23+
|
||||
*(short*) at(_frame_complete_offset) = offset;
|
||||
} else {
|
||||
*(int*) at(_frame_complete_offset) = offset;
|
||||
}
|
||||
}
|
||||
|
||||
const char* immutableDataAt(int offset) {
|
||||
if (_nmethod_immutable_offset > 0) {
|
||||
return *(const char**) at(_nmethod_immutable_offset) + offset;
|
||||
@@ -518,24 +513,23 @@ class NMethod : VMStructs {
|
||||
return *(const char**) at(_nmethod_name_offset);
|
||||
}
|
||||
|
||||
bool isNMethod() {
|
||||
const char* n = name();
|
||||
return n != NULL && (strcmp(n, "nmethod") == 0 || strcmp(n, "native nmethod") == 0);
|
||||
bool isInterpreter() {
|
||||
return this == _interpreter_nm;
|
||||
}
|
||||
|
||||
bool isInterpreter() {
|
||||
bool isNMethod() {
|
||||
const char* n = name();
|
||||
return n != NULL && strcmp(n, "Interpreter") == 0;
|
||||
return n != NULL && (streq(n, "nmethod") || streq(n, "native nmethod"));
|
||||
}
|
||||
|
||||
bool isStub() {
|
||||
const char* n = name();
|
||||
return n != NULL && strncmp(n, "StubRoutines", 12) == 0;
|
||||
return n != NULL && startsWith(n, "StubRoutines");
|
||||
}
|
||||
|
||||
bool isVTableStub() {
|
||||
const char* n = name();
|
||||
return n != NULL && strcmp(n, "vtable chunks") == 0;
|
||||
return n != NULL && startsWith(n, "vtable chunks");
|
||||
}
|
||||
|
||||
VMMethod* method() {
|
||||
@@ -546,10 +540,6 @@ class NMethod : VMStructs {
|
||||
return *at(_nmethod_state_offset);
|
||||
}
|
||||
|
||||
bool isAlive() {
|
||||
return state() >= 0 && state() <= 1;
|
||||
}
|
||||
|
||||
int level() {
|
||||
return _nmethod_level_offset >= 0 ? *(signed char*) at(_nmethod_level_offset) : 0;
|
||||
}
|
||||
@@ -596,6 +586,11 @@ class CodeHeap : VMStructs {
|
||||
high = _code_heap_high);
|
||||
}
|
||||
|
||||
static void setInterpreterStart(const void* start) {
|
||||
_interpreter_start = start;
|
||||
_interpreter_nm = findNMethod(start);
|
||||
}
|
||||
|
||||
static NMethod* findNMethod(const void* pc) {
|
||||
if (contains(_code_heap[0], pc)) return findNMethod(_code_heap[0], pc);
|
||||
if (contains(_code_heap[1], pc)) return findNMethod(_code_heap[1], pc);
|
||||
|
||||
@@ -67,7 +67,7 @@ class ThreadCpuTimeBuffer {
|
||||
void reset() {
|
||||
memset(_ringbuf, 0, sizeof(_ringbuf));
|
||||
_read_ptr = 0;
|
||||
__atomic_store_n(&_write_ptr, 0, __ATOMIC_RELEASE);
|
||||
storeRelease(_write_ptr, 0);
|
||||
}
|
||||
|
||||
void add(u64 trace) {
|
||||
|
||||
@@ -8,10 +8,21 @@ package one.profiler.test;
|
||||
import java.io.File;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.util.HashSet;
|
||||
import java.util.*;
|
||||
import java.util.logging.Handler;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.atomic.AtomicIntegerArray;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
|
||||
|
||||
public class Runner {
|
||||
private static final Logger log = Logger.getLogger(Runner.class.getName());
|
||||
@@ -60,13 +71,8 @@ public class Runner {
|
||||
File javaHome = new File(System.getProperty("java.home"));
|
||||
|
||||
// Look for OpenJ9-specific file
|
||||
File[] files = new File(javaHome, "lib").listFiles();
|
||||
if (files != null) {
|
||||
for (File file : files) {
|
||||
if (file.getName().equals("J9TraceFormat.dat")) {
|
||||
return Jvm.OPENJ9;
|
||||
}
|
||||
}
|
||||
if (new File(javaHome, "lib/J9TraceFormat.dat").exists()) {
|
||||
return Jvm.OPENJ9;
|
||||
}
|
||||
|
||||
// Strip /jre from JDK 8 path
|
||||
@@ -74,17 +80,12 @@ public class Runner {
|
||||
javaHome = javaHome.getParentFile();
|
||||
}
|
||||
|
||||
// Workaround for Contents/Home on macOS
|
||||
if (currentOs == Os.MACOS) {
|
||||
javaHome = javaHome.getParentFile();
|
||||
}
|
||||
|
||||
// Look for Zing-specific file
|
||||
if (new File(javaHome, "etc/zing").exists()) {
|
||||
return Jvm.ZING;
|
||||
}
|
||||
|
||||
if (!new File(System.getProperty("java.home"), "lib/" + System.mapLibraryName("jvmcicompiler")).exists()) {
|
||||
if (!new File(javaHome, "lib/" + System.mapLibraryName("jvmcicompiler")).exists()) {
|
||||
return Jvm.HOTSPOT_C2;
|
||||
}
|
||||
|
||||
@@ -110,7 +111,7 @@ public class Runner {
|
||||
(jvmVer.length == 0 || (currentJvmVersion >= jvmVer[0] && currentJvmVersion <= jvmVer[jvmVer.length - 1]));
|
||||
}
|
||||
|
||||
private static TestResult run(RunnableTest rt, TestDeclaration decl) {
|
||||
private static TestResult runTest(RunnableTest rt, TestDeclaration decl) {
|
||||
if (!rt.test().enabled() || decl.skips(rt.method())) {
|
||||
return TestResult.skipDisabled();
|
||||
}
|
||||
@@ -156,26 +157,41 @@ public class Runner {
|
||||
}
|
||||
}
|
||||
|
||||
private static void printSummary(EnumMap<TestStatus, Integer> statusCounts, List<String> failedTests, long totalTestDuration, int testCount) {
|
||||
int fail = statusCounts.getOrDefault(TestStatus.FAIL, 0);
|
||||
private static void printSummary(AtomicIntegerArray statusCounts, Set<String> failedTests, long totalTestDuration, long executionDuration, int testCount) {
|
||||
int fail = statusCounts.get(TestStatus.FAIL.ordinal());
|
||||
if (fail > 0) {
|
||||
System.out.println("\nFailed tests:");
|
||||
failedTests.forEach(System.out::println);
|
||||
}
|
||||
|
||||
int pass = statusCounts.getOrDefault(TestStatus.PASS, 0);
|
||||
String totalDuration = String.format("%.3f s", totalTestDuration / 1e9);
|
||||
int pass = statusCounts.get(TestStatus.PASS.ordinal());
|
||||
|
||||
System.out.println("\nTotal test duration: " + totalDuration);
|
||||
System.out.printf("\nTotal test duration: %.3f s\n", totalTestDuration / 1e9);
|
||||
System.out.printf("Actual execution duration: %.3f s\n", executionDuration / 1e9);
|
||||
System.out.println("Results Summary:");
|
||||
System.out.printf("PASS: %d (%.1f%%)\n", pass, 100.0 * pass / (pass + fail));
|
||||
System.out.println("FAIL: " + fail);
|
||||
System.out.println("SKIP (disabled): " + statusCounts.getOrDefault(TestStatus.SKIP_DISABLED, 0));
|
||||
System.out.println("SKIP (config mismatch): " + statusCounts.getOrDefault(TestStatus.SKIP_CONFIG_MISMATCH, 0));
|
||||
System.out.println("SKIP (missing JAR): " + statusCounts.getOrDefault(TestStatus.SKIP_MISSING_JAR, 0));
|
||||
System.out.println("SKIP (disabled): " + statusCounts.get(TestStatus.SKIP_DISABLED.ordinal()));
|
||||
System.out.println("SKIP (config mismatch): " + statusCounts.get(TestStatus.SKIP_CONFIG_MISMATCH.ordinal()));
|
||||
System.out.println("SKIP (missing JAR): " + statusCounts.get(TestStatus.SKIP_MISSING_JAR.ordinal()));
|
||||
System.out.println("TOTAL: " + testCount);
|
||||
}
|
||||
|
||||
private static void waitForExecutorTermination(ThreadPoolExecutor executor) {
|
||||
executor.shutdown(); // Initiate orderly shutdown
|
||||
try {
|
||||
// Wait for all tasks to finish, setting this high for future proofing
|
||||
boolean terminated = executor.awaitTermination(6, TimeUnit.HOURS);
|
||||
if (terminated) {
|
||||
System.out.println("All tasks finished and executor is terminated.");
|
||||
} else {
|
||||
System.out.println("Timeout occurred before all tasks finished.");
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt(); // Restore the interrupt flag
|
||||
System.err.println("Main thread interrupted while waiting for termination.");
|
||||
}
|
||||
}
|
||||
public static void main(String[] args) throws Exception {
|
||||
configureLogging();
|
||||
|
||||
@@ -183,38 +199,66 @@ public class Runner {
|
||||
List<RunnableTest> allTests = decl.getRunnableTests();
|
||||
final int testCount = allTests.size();
|
||||
final int retryCount = Integer.parseInt(System.getProperty("retryCount", "0"));
|
||||
final int threadCount = Integer.parseInt(System.getProperty("threadCount", "8"));
|
||||
|
||||
int i = 1;
|
||||
long totalTestDuration = 0;
|
||||
List<String> failedTests = new ArrayList<>();
|
||||
EnumMap<TestStatus, Integer> statusCounts = new EnumMap<>(TestStatus.class);
|
||||
log.log(Level.INFO, "Running with " + threadCount + " test threads.");
|
||||
|
||||
AtomicLong i = new AtomicLong(1);
|
||||
AtomicLong totalTestDuration = new AtomicLong();
|
||||
Set<String> failedTests = ConcurrentHashMap.newKeySet();
|
||||
AtomicIntegerArray statusCounts = new AtomicIntegerArray(TestStatus.values().length);
|
||||
|
||||
final ThreadPoolExecutor executor = new ThreadPoolExecutor(threadCount, threadCount, 60L, TimeUnit.SECONDS, new ArrayBlockingQueue<>(testCount));
|
||||
final ThreadPoolExecutor singleExecutor = new ThreadPoolExecutor(1, 1, 60L, TimeUnit.SECONDS, new ArrayBlockingQueue<>(testCount));
|
||||
|
||||
final ArrayList<Callable<TestResult>> multithreadedTests = new ArrayList<>();
|
||||
final ArrayList<Callable<TestResult>> singlethreadedTests = new ArrayList<>();
|
||||
|
||||
long startTime = System.nanoTime();
|
||||
for (RunnableTest rt : allTests) {
|
||||
long start = System.nanoTime();
|
||||
TestResult result = run(rt, decl);
|
||||
Callable<TestResult> task = () -> {
|
||||
long start = System.nanoTime();
|
||||
TestResult result = runTest(rt, decl);
|
||||
|
||||
int attempt = 1;
|
||||
while (result.status() == TestStatus.FAIL && attempt <= retryCount) {
|
||||
log.log(Level.WARNING, "Test failed, retrying (attempt " + attempt + "/" + retryCount + ")...");
|
||||
result = run(rt, decl);
|
||||
attempt++;
|
||||
int attempt = 1;
|
||||
while (result.status() == TestStatus.FAIL && attempt <= retryCount) {
|
||||
log.log(Level.WARNING, "Test failed, retrying (attempt " + attempt + "/" + retryCount + ")...");
|
||||
result = runTest(rt, decl);
|
||||
attempt++;
|
||||
}
|
||||
|
||||
long durationNs = System.nanoTime() - start;
|
||||
|
||||
totalTestDuration.addAndGet(durationNs);
|
||||
// Using ordinal here should be fine since it isn't persisted and is just used as a lookup here.
|
||||
statusCounts.incrementAndGet(result.status().ordinal());
|
||||
|
||||
if (result.status() == TestStatus.FAIL) {
|
||||
failedTests.add(rt.testInfo());
|
||||
}
|
||||
|
||||
System.out.printf("%s [%d/%d] tid[%d] %s took %.3f s\n", result.status(), i.getAndIncrement(), testCount, Thread.currentThread().getId(), rt.testInfo(), durationNs / 1e9);
|
||||
if (result.throwable() != null) {
|
||||
result.throwable().printStackTrace(System.out);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
if (rt.test().runIsolated()) {
|
||||
singlethreadedTests.add(task);
|
||||
} else {
|
||||
multithreadedTests.add(task);
|
||||
}
|
||||
|
||||
long durationNs = System.nanoTime() - start;
|
||||
|
||||
totalTestDuration += durationNs;
|
||||
statusCounts.put(result.status(), statusCounts.getOrDefault(result.status(), 0) + 1);
|
||||
if (result.status() == TestStatus.FAIL) {
|
||||
failedTests.add(rt.testInfo());
|
||||
}
|
||||
|
||||
System.out.printf("%s [%d/%d] %s took %.3f s\n", result.status(), i, testCount, rt.testInfo(), durationNs / 1e9);
|
||||
if (result.throwable() != null) {
|
||||
result.throwable().printStackTrace(System.out);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
printSummary(statusCounts, failedTests, totalTestDuration, testCount);
|
||||
executor.invokeAll(multithreadedTests);
|
||||
waitForExecutorTermination(executor);
|
||||
|
||||
log.log(Level.INFO, "Starting single threaded tests...");
|
||||
singleExecutor.invokeAll(singlethreadedTests);
|
||||
waitForExecutorTermination(singleExecutor);
|
||||
|
||||
long endTime = System.nanoTime();
|
||||
printSummary(statusCounts, failedTests, totalTestDuration.get(), endTime - startTime, testCount);
|
||||
|
||||
if (!logDir.isEmpty()) {
|
||||
log.log(Level.INFO, "Test output and profiles are available in " + logDir + " directory");
|
||||
|
||||
@@ -48,4 +48,6 @@ public @interface Test {
|
||||
String[] inputs() default {};
|
||||
|
||||
String nameSuffix() default "";
|
||||
|
||||
boolean runIsolated() default false;
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
|
||||
package test.alloc;
|
||||
|
||||
import jdk.jfr.consumer.RecordedEvent;
|
||||
import jdk.jfr.consumer.RecordingFile;
|
||||
import one.jfr.JfrReader;
|
||||
import one.jfr.StackTrace;
|
||||
import one.jfr.event.AllocationSample;
|
||||
@@ -30,7 +32,7 @@ public class AllocTests {
|
||||
assert out.contains("java\\.lang\\.String\\[]");
|
||||
}
|
||||
|
||||
@Test(mainClass = MapReaderOpt.class, jvmArgs = "-XX:+UseParallelGC -Xmx1g -Xms1g", jvm = {Jvm.HOTSPOT, Jvm.ZING})
|
||||
@Test(mainClass = MapReaderOpt.class, jvmArgs = "-XX:+UseParallelGC -Xmx1g -Xms1g", jvm = {Jvm.HOTSPOT, Jvm.ZING}, runIsolated = true)
|
||||
public void allocTotal(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-e alloc -d 3 -o collapsed --total");
|
||||
assert out.samples("java.util.HashMap\\$Node\\[]") > 1_000_000;
|
||||
@@ -41,7 +43,7 @@ public class AllocTests {
|
||||
assert out.contains("java\\.util\\.HashMap\\$Node\\[]");
|
||||
}
|
||||
|
||||
@Test(mainClass = Hello.class, agentArgs = "start,event=alloc,alloc=1,cstack=fp,flamegraph,file=%f", jvmArgs = "-XX:+UseG1GC -XX:-UseTLAB")
|
||||
@Test(mainClass = Hello.class, agentArgs = "start,event=alloc,alloc=1,cstack=fp,flamegraph,file=%f", jvmArgs = "-XX:+UseG1GC -XX:-UseTLAB", runIsolated = true)
|
||||
public void startup(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%f");
|
||||
out = out.convertFlameToCollapsed();
|
||||
@@ -57,8 +59,27 @@ public class AllocTests {
|
||||
public void humongous(TestProcess p) throws Exception {
|
||||
Thread.sleep(1000);
|
||||
Output out = p.profile("stop -o collapsed");
|
||||
assert out.contains("java/io/ByteArrayOutputStream.toByteArray;");
|
||||
assert out.contains("G1CollectedHeap::humongous_obj_allocate");
|
||||
assert out.contains("java/io/ByteArrayOutputStream") : out;
|
||||
assert out.contains("G1CollectedHeap::humongous_obj_allocate") : out;
|
||||
}
|
||||
|
||||
@Test(mainClass = MapReaderOpt.class)
|
||||
public void tlabAllocSampler(TestProcess p) throws Exception {
|
||||
p.profile("-e alloc --tlab -d 3 -f %profile.jfr");
|
||||
boolean tlabEvent = false;
|
||||
|
||||
try (RecordingFile recordingFile = new RecordingFile(p.getFile("%profile").toPath())) {
|
||||
while (recordingFile.hasMoreEvents()) {
|
||||
RecordedEvent event = recordingFile.readEvent();
|
||||
String eventName = event.getEventType().getName();
|
||||
if (eventName != null && eventName.equals("jdk.ObjectAllocationOutsideTLAB")) {
|
||||
tlabEvent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert tlabEvent : "No jdk.ObjectAllocationOutsideTLAB event was found";
|
||||
}
|
||||
|
||||
@Test(mainClass = MapReaderOpt.class, jvmVer = {11, Integer.MAX_VALUE})
|
||||
|
||||
@@ -32,7 +32,7 @@ public class CpuTests {
|
||||
}
|
||||
}
|
||||
|
||||
@Test(mainClass = CpuBurner.class, os = Os.LINUX)
|
||||
@Test(mainClass = CpuBurner.class, os = Os.LINUX, runIsolated = true)
|
||||
public void ctimerTotal(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-d 2 -e ctimer -i 100ms --total -o collapsed");
|
||||
assertCloseTo(out.total(), 2_000_000_000, "ctimer total should match profiling duration");
|
||||
@@ -41,13 +41,13 @@ public class CpuTests {
|
||||
assertCloseTo(out.total(), 2_000_000_000, "ctimer total should not depend on the profiling interval");
|
||||
}
|
||||
|
||||
@Test(mainClass = CpuBurner.class)
|
||||
@Test(mainClass = CpuBurner.class, runIsolated = true)
|
||||
public void itimerTotal(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-d 2 -e itimer -i 100ms --total -o collapsed");
|
||||
assertCloseTo(out.total(), 2_000_000_000, "itimer total should match profiling duration");
|
||||
}
|
||||
|
||||
@Test(mainClass = CpuBurner.class, os = Os.LINUX)
|
||||
@Test(mainClass = CpuBurner.class, os = Os.LINUX, runIsolated = true)
|
||||
public void perfEventsTargetCpuEventsCount(TestProcess p) throws Exception {
|
||||
pinCpu(p, 0);
|
||||
|
||||
@@ -55,7 +55,7 @@ public class CpuTests {
|
||||
Assert.isEqual(outWrongCpu.total(), 0, "perf_events total should be 0 when the wrong CPU is targeted");
|
||||
|
||||
Output outRightCpu = p.profile("-d 2 -e cpu-clock -i 100ms --total -o collapsed --target-cpu 0");
|
||||
assertCloseTo(outRightCpu.total(), 2_000_000_000, "perf_events total should match profiling duration");
|
||||
Assert.isGreater(outRightCpu.total(), 100_000_000, "perf_events total should accumulate perf counter value");
|
||||
}
|
||||
|
||||
@Test(mainClass = CpuBurner.class, os = Os.LINUX)
|
||||
@@ -71,7 +71,7 @@ public class CpuTests {
|
||||
assert !output.contains("\\[CPU-0\\]");
|
||||
}
|
||||
|
||||
@Test(mainClass = CpuBurner.class, os = Os.LINUX)
|
||||
@Test(mainClass = CpuBurner.class, os = Os.LINUX, runIsolated = true)
|
||||
public void perfEventsTargetCpuWithFdtransferEventsCount(TestProcess p) throws Exception {
|
||||
pinCpu(p, 0);
|
||||
|
||||
@@ -79,7 +79,7 @@ public class CpuTests {
|
||||
Assert.isEqual(outWrongCpu.total(), 0, "perf_events total should be 0 when the wrong CPU is targeted");
|
||||
|
||||
Output outRightCpu = p.profile("-d 2 -e cpu-clock -i 100ms --total -o collapsed --target-cpu 0 --fdtransfer");
|
||||
assertCloseTo(outRightCpu.total(), 2_000_000_000, "perf_events total should match profiling duration");
|
||||
Assert.isGreater(outRightCpu.total(), 100_000_000, "perf_events total should accumulate perf counter value");
|
||||
}
|
||||
|
||||
@Test(mainClass = CpuBurner.class, os = Os.LINUX)
|
||||
|
||||
@@ -18,12 +18,12 @@ public class CstackTests {
|
||||
@Test(mainClass = LongInitializer.class)
|
||||
public void asyncGetCallTrace(TestProcess p) throws Exception {
|
||||
Output out = p.profile(PROFILE_COMMAND + "--cstack no");
|
||||
assert !out.contains(";readBytes");
|
||||
assert out.contains("LongInitializer.main_\\[j]");
|
||||
assert !out.contains(";readBytes") : out;
|
||||
assert out.contains("LongInitializer.main_\\[j]") : out;
|
||||
|
||||
out = p.profile(PROFILE_COMMAND + "--cstack fp");
|
||||
assert out.contains(";readBytes");
|
||||
assert out.contains("LongInitializer.main_\\[j]");
|
||||
assert out.contains(";readBytes") : out;
|
||||
assert out.contains("LongInitializer.main_\\[j]") : out;
|
||||
}
|
||||
|
||||
@Test(mainClass = LongInitializer.class, jvm = Jvm.HOTSPOT, os = Os.LINUX)
|
||||
|
||||
73
test/test/depth/DeepRecursion.java
Normal file
73
test/test/depth/DeepRecursion.java
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package test.depth;
|
||||
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
public class DeepRecursion {
|
||||
private static int BASE_DEPTH;
|
||||
private static int VAR_DEPTH;
|
||||
|
||||
private final int[] count = new int[4];
|
||||
private int depth = 1;
|
||||
|
||||
private void m0() {
|
||||
count[0]++;
|
||||
dispatch();
|
||||
}
|
||||
|
||||
private void m1() {
|
||||
count[1]++;
|
||||
dispatch();
|
||||
}
|
||||
|
||||
private void m2() {
|
||||
count[2]++;
|
||||
dispatch();
|
||||
}
|
||||
|
||||
private void m3() {
|
||||
count[3]++;
|
||||
dispatch();
|
||||
}
|
||||
|
||||
private void dispatch() {
|
||||
if (depth + 2 > BASE_DEPTH + ThreadLocalRandom.current().nextInt(VAR_DEPTH + 1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
depth += 2;
|
||||
switch (ThreadLocalRandom.current().nextInt(4)) {
|
||||
case 0:
|
||||
m0();
|
||||
break;
|
||||
case 1:
|
||||
m1();
|
||||
break;
|
||||
case 2:
|
||||
m2();
|
||||
break;
|
||||
case 3:
|
||||
m3();
|
||||
break;
|
||||
}
|
||||
depth -= 2;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
BASE_DEPTH = args.length > 0 ? Integer.parseInt(args[0]) : 100;
|
||||
VAR_DEPTH = args.length > 1 ? Integer.parseInt(args[1]) : 0;
|
||||
boolean print = args.length > 2 && Boolean.parseBoolean(args[2]);
|
||||
|
||||
DeepRecursion test = new DeepRecursion();
|
||||
for (int i = 0; ; i++) {
|
||||
test.dispatch();
|
||||
if (print && i % 1000000 == 0) {
|
||||
System.out.println("Made " + i + " calls");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
49
test/test/depth/DepthTests.java
Normal file
49
test/test/depth/DepthTests.java
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package test.depth;
|
||||
|
||||
import one.profiler.test.Assert;
|
||||
import one.profiler.test.Output;
|
||||
import one.profiler.test.Test;
|
||||
import one.profiler.test.TestProcess;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class DepthTests {
|
||||
|
||||
private static long frameCount(String stack) {
|
||||
return stack.chars().filter(c -> c == ';').count() + 1;
|
||||
}
|
||||
|
||||
@Test(mainClass = DeepRecursion.class)
|
||||
public void maxDepth(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-e cpu --all-user -d 2 -s -o collapsed -j 50");
|
||||
|
||||
// Overall profile depth is exactly 50 frames
|
||||
long depth = out.stream().mapToLong(DepthTests::frameCount).max().getAsLong();
|
||||
Assert.isEqual(depth, 50);
|
||||
|
||||
// At least some non-truncated stacks are smaller
|
||||
assert out.stream("^DeepRecursion.main").anyMatch(s -> frameCount(s) < 50);
|
||||
|
||||
// Flame graph has exactly 51 levels (+1 for the root frame)
|
||||
out = p.profile("stop -o flamegraph");
|
||||
assert out.containsExact("Array(51)");
|
||||
|
||||
out = p.profile("-e cpu --all-user -d 2 -s -o collapsed -j 50/20");
|
||||
|
||||
// Non-truncated stacks can be anything between 1 and 50 frames
|
||||
List<String> full = out.stream("^DeepRecursion.main").collect(Collectors.toList());
|
||||
assert full.stream().allMatch(s -> frameCount(s) < 50);
|
||||
assert full.stream().anyMatch(s -> frameCount(s) < 20);
|
||||
|
||||
// All truncated stacks start with [truncated] followed by exactly 20 frames
|
||||
List<String> truncated = out.stream("^DeepRecursion.(?!main)").collect(Collectors.toList());
|
||||
assert truncated.stream().allMatch(s -> s.startsWith("[truncated];"));
|
||||
assert truncated.stream().allMatch(s -> frameCount(s) == 21);
|
||||
}
|
||||
}
|
||||
@@ -385,7 +385,8 @@ public class InstrumentTests {
|
||||
agentArgs = "start,threads,trace=*.*:100ms,collapsed,file=%f",
|
||||
jvmArgs = "-Xverify:all",
|
||||
output = true,
|
||||
error = true
|
||||
error = true,
|
||||
runIsolated = true
|
||||
)
|
||||
// Smoke test: if any validation failure happens Instrument::BytecodeRewriter has a bug
|
||||
public void latencyAll(TestProcess p) throws Exception {
|
||||
|
||||
@@ -42,7 +42,7 @@ public class JfrTests {
|
||||
assert out.contains(normalLoadPattern);
|
||||
}
|
||||
|
||||
@Test(mainClass = CpuLoad.class, agentArgs = "start,event=cpu,wall,record-cpu,file=%profile.jfr", os = Os.LINUX)
|
||||
@Test(mainClass = CpuLoad.class, agentArgs = "start,event=cpu,alluser,wall,record-cpu,file=%profile.jfr", os = Os.LINUX)
|
||||
public void recordCpuMultiEngine(TestProcess p) throws Exception {
|
||||
p.waitForExit();
|
||||
assert p.exitCode() == 0;
|
||||
|
||||
@@ -5,13 +5,19 @@
|
||||
|
||||
package test.jfrconverter;
|
||||
|
||||
import test.otlp.CpuBurner;
|
||||
import one.convert.*;
|
||||
import one.jfr.JfrReader;
|
||||
import one.jfr.StackTrace;
|
||||
import one.jfr.event.Event;
|
||||
import one.jfr.event.EventCollector;
|
||||
import one.jfr.StackTrace;
|
||||
import one.profiler.test.*;
|
||||
import one.profiler.test.Output;
|
||||
import one.profiler.test.Test;
|
||||
import one.profiler.test.TestProcess;
|
||||
import test.otlp.CpuBurner;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
// Simple smoke tests for JFR converter. The output is not inspected for errors,
|
||||
// we only verify that the conversion completes successfully.
|
||||
@@ -33,7 +39,7 @@ public class JfrconverterTests {
|
||||
JfrToFlame.convert(p.getFilePath("%f"), "/dev/null", new Arguments("--alloc"));
|
||||
}
|
||||
|
||||
@Test(mainClass = Tracer.class, agentArgs = "start,jfr,wall,trace=test.jfrconverter.Tracer.traceMethod,file=%f")
|
||||
@Test(mainClass = Tracer.class, agentArgs = "start,jfr,wall,trace=test.jfrconverter.Tracer.traceMethod,file=%f", runIsolated = true)
|
||||
public void latencyFilter(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%f");
|
||||
assert p.exitCode() == 0;
|
||||
@@ -72,4 +78,33 @@ public class JfrconverterTests {
|
||||
assert !found[3];
|
||||
}
|
||||
}
|
||||
|
||||
@Test(mainClass = Main.class, args = "--diff test/test/jfrconverter/sample1.collapsed test/test/jfrconverter/sample2.collapsed %diff.collapsed")
|
||||
public void diffCollapsed(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%diff");
|
||||
assert out.containsExact("BusyClient.run_[j] 4 1");
|
||||
assert out.containsExact("BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j] 2 2");
|
||||
assert out.containsExact("ByteBuffer.get_[i];ByteBuffer.getArray_[i] 0 1");
|
||||
assert out.samples("ByteBuffer.get") == 2;
|
||||
}
|
||||
|
||||
@Test(mainClass = Main.class, args = "--diff test/test/jfrconverter/sample1.collapsed test/test/jfrconverter/sample2.collapsed %diff.html")
|
||||
public void diffHtml(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%diff");
|
||||
assert out.containsExact("d=-3");
|
||||
assert out.containsExact("d=0");
|
||||
assert out.containsExact("d=U");
|
||||
|
||||
// It should be possible to reconstruct original FlameGraph from the differential one
|
||||
byte[] original = buildFlameGraph("test/test/jfrconverter/sample2.collapsed");
|
||||
byte[] reconstructed = buildFlameGraph(p.getFilePath("%diff"));
|
||||
assert Arrays.equals(original, reconstructed);
|
||||
}
|
||||
|
||||
private static byte[] buildFlameGraph(String input) throws IOException {
|
||||
FlameGraph fg = FlameGraph.parse(input, new Arguments());
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
fg.dump(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
||||
|
||||
17
test/test/jfrconverter/sample1.collapsed
Normal file
17
test/test/jfrconverter/sample1.collapsed
Normal file
@@ -0,0 +1,17 @@
|
||||
BusyClient.run_[j] 4
|
||||
BusyClient.run_[j];InputStream.read_[j] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j] 2
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j] 3
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.beginRead_[i] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.beginRead_[i];NativeThread.current_[i];NativeThread.current0_[j] 3
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.endRead_[i] 3
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];Util.getTemporaryDirectBuffer_[i];CarrierThreadLocal.get_[i];System$2.getCarrierThreadLocal_[i];ThreadLocal.getCarrierThreadLocal_[i];jlong_disjoint_arraycopy 15
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];Util.getTemporaryDirectBuffer_[i];Util$BufferCache.get_[i];Buffer.capacity_[i] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];SocketDispatcher.read_[i] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];SocketDispatcher.read_[i];SocketDispatcher.read0_[j] 3
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];SocketDispatcher.read_[i];SocketDispatcher.read0_[j];Java_sun_nio_ch_SocketDispatcher_read0;read 143
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];ReentrantLock.lock_[i];ReentrantLock$Sync.lock_[i];ReentrantLock$NonfairSync.initialTryLock_[i] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];ReentrantLock.lock_[i];ReentrantLock$Sync.lock_[i];ReentrantLock$NonfairSync.initialTryLock_[i];AbstractQueuedSynchronizer.compareAndSetState_[i] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];ReentrantLock.unlock_[i];AbstractQueuedSynchronizer.release_[i];ReentrantLock$Sync.tryRelease_[i];AbstractQueuedSynchronizer.setState_[i] 1
|
||||
17
test/test/jfrconverter/sample2.collapsed
Normal file
17
test/test/jfrconverter/sample2.collapsed
Normal file
@@ -0,0 +1,17 @@
|
||||
BusyClient.run_[j] 1
|
||||
BusyClient.run_[j];InputStream.read_[j] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j] 2
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j] 3
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.beginRead_[i] 3
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.beginRead_[i];NativeThread.current_[i];NativeThread.current0_[j] 4
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.endRead_[i] 4
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];ByteBuffer.get_[i];ByteBuffer.getArray_[i] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];ByteBuffer.get_[i];Buffer.position_[i] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];Util.getTemporaryDirectBuffer_[i];CarrierThreadLocal.get_[i];System$2.getCarrierThreadLocal_[i];ThreadLocal.getCarrierThreadLocal_[i];jlong_disjoint_arraycopy 6
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];SocketDispatcher.read_[i];SocketDispatcher.read0_[j] 4
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];SocketDispatcher.read_[i];SocketDispatcher.read0_[j];Java_sun_nio_ch_SocketDispatcher_read0 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];NioSocketImpl.implRead_[j];NioSocketImpl.tryRead_[j];SocketDispatcher.read_[i];SocketDispatcher.read0_[j];Java_sun_nio_ch_SocketDispatcher_read0;read 151
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];ReentrantLock.lock_[i] 1
|
||||
BusyClient.run_[j];InputStream.read_[j];Socket$SocketInputStream.read_[j];NioSocketImpl$1.read_[j];NioSocketImpl.read_[j];ReentrantLock.unlock_[i];AbstractQueuedSynchronizer.release_[i];ReentrantLock$Sync.tryRelease_[i];AbstractQueuedSynchronizer.setState_[i] 3
|
||||
@@ -37,7 +37,7 @@ public class NativelockTests {
|
||||
assert out.contains("pthread_rwlock_wrlock_hook") : "No wrlock samples captured with LD_PRELOAD";
|
||||
}
|
||||
|
||||
@Test(sh = "LD_PRELOAD=%lib ASPROF_COMMAND=start,nativelock,file=%f.jfr %testbin/native_lock_contention", os = Os.LINUX)
|
||||
@Test(sh = "LD_PRELOAD=%lib ASPROF_COMMAND=start,nativelock,file=%f.jfr %testbin/native_lock_contention", os = Os.LINUX, runIsolated = true)
|
||||
public void nativeAllLockContention(TestProcess p) throws Exception {
|
||||
p.waitForExit();
|
||||
Output out = Output.convertJfrToCollapsed(p.getFilePath("%f"), "--nativelock");
|
||||
|
||||
@@ -32,7 +32,7 @@ public class NonjavaTests {
|
||||
}
|
||||
|
||||
// jvm is loaded between two profiling sessions
|
||||
@Test(sh = "%testbin/non_java_app 3 %f.collapsed %s.collapsed", output = true)
|
||||
@Test(sh = "%testbin/non_java_app 3 %f.collapsed %s.collapsed", output = true, runIsolated = true)
|
||||
public void jvmInBetween(TestProcess p) throws Exception {
|
||||
p.waitForExit();
|
||||
assert p.exitCode() == 0;
|
||||
|
||||
@@ -51,7 +51,7 @@ public class OtlpTests {
|
||||
|
||||
@Test(mainClass = CpuBurner.class, agentArgs = "start,jfr,file=%f")
|
||||
public void threadNameFromJfr(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%f");
|
||||
p.waitForExit();
|
||||
assert p.exitCode() == 0;
|
||||
|
||||
ProfilesData profilesData = profilesDataFromJfr(p.getFilePath("%f"), new Arguments("--cpu", "--output", "otlp"));
|
||||
@@ -76,7 +76,7 @@ public class OtlpTests {
|
||||
|
||||
@Test(mainClass = CpuBurner.class, agentArgs = "start,jfr,file=%f")
|
||||
public void samplesFromJfr(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%f");
|
||||
p.waitForExit();
|
||||
assert p.exitCode() == 0;
|
||||
|
||||
ProfilesData profilesData = profilesDataFromJfr(p.getFilePath("%f"), new Arguments("--cpu", "--output", "otlp"));
|
||||
@@ -88,6 +88,20 @@ public class OtlpTests {
|
||||
assert collapsed.containsExact("test/otlp/CpuBurner.lambda$main$0;test/otlp/CpuBurner.burn") : collapsed;
|
||||
}
|
||||
|
||||
@Test(mainClass = CpuBurner.class, agentArgs = "start,jfr,file=%f")
|
||||
public void nonAggregatedSamplesFromJfr(TestProcess p) throws Exception {
|
||||
p.waitForExit();
|
||||
assert p.exitCode() == 0;
|
||||
|
||||
ProfilesData profilesData = profilesDataFromJfr(p.getFilePath("%f"), new Arguments("--cpu", "--output", "otlp"));
|
||||
boolean found = false;
|
||||
for (Sample sample : getProfile(profilesData, 0).getSamplesList()) {
|
||||
assert(sample.getValuesList().size() == sample.getTimestampsUnixNanoList().size());
|
||||
found = found || sample.getValuesList().size() > 1;
|
||||
}
|
||||
assert found : "No sample contains more than one value/timestamp pair";
|
||||
}
|
||||
|
||||
@Test(mainClass = OtlpProfileTimeTest.class)
|
||||
public void profileTime(TestProcess p) throws Exception {
|
||||
classpathCheck();
|
||||
@@ -98,7 +112,7 @@ public class OtlpTests {
|
||||
|
||||
@Test(mainClass = CpuBurner.class, agentArgs = "start,jfr,file=%f")
|
||||
public void profileTimeFromJfr(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%f");
|
||||
p.waitForExit();
|
||||
assert p.exitCode() == 0;
|
||||
|
||||
ProfilesData profilesData = profilesDataFromJfr(p.getFilePath("%f"), new Arguments("--cpu", "--output", "otlp"));
|
||||
@@ -140,7 +154,7 @@ public class OtlpTests {
|
||||
for (int i = locations.size() - 1; i > 0; --i) {
|
||||
stackTrace.append(getFrameName(locations.get(i), dictionary)).append(';');
|
||||
}
|
||||
stackTrace.append(getFrameName(locations.get(locations.size() - 1), dictionary));
|
||||
stackTrace.append(getFrameName(locations.get(0), dictionary));
|
||||
|
||||
stackTracesCount.compute(stackTrace.toString(), (key, oldValue) -> sample.getValues(valueIdx) + (oldValue == null ? 0 : oldValue));
|
||||
}
|
||||
|
||||
@@ -16,13 +16,9 @@ public class RecoveryTests {
|
||||
|
||||
@Test(mainClass = StringBuilderTest.class, jvmArgs = "-XX:UseAVX=2", arch = {Arch.X64, Arch.X86}, debugNonSafepoints = true)
|
||||
public void stringBuilder(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-d 3 -e cpu --cstack fp -o collapsed");
|
||||
Assert.isGreater(out.ratio("StringBuilder.delete;"), 0.8);
|
||||
Assert.isGreater(out.ratio("arraycopy"), 0.8);
|
||||
Assert.isLess(out.ratio("unknown_Java"), 0.01);
|
||||
|
||||
out = p.profile("-d 2 -e cpu -i 1ms -o collapsed");
|
||||
Output out = p.profile("-d 2 -e cpu -i 1ms -o collapsed");
|
||||
Assert.isGreater(out.ratio("StringBuilderTest.main;java/lang/StringBuilder.delete;"), 0.8);
|
||||
Assert.isGreater(out.ratio("arraycopy"), 0.8);
|
||||
Assert.isLess(out.ratio("unknown|break_compiled"), 0.005);
|
||||
}
|
||||
|
||||
@@ -36,35 +32,27 @@ public class RecoveryTests {
|
||||
jvmVer = {8, 17}
|
||||
)
|
||||
public void stringBuilderArm(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-d 3 -e cpu --cstack fp -o collapsed");
|
||||
Assert.isGreater(out.ratio("(forward|foward|backward)_copy_longs"), 0.8); // there's a typo on some JDK versions
|
||||
|
||||
out = p.profile("-d 2 -e cpu -i 1ms -o collapsed");
|
||||
Output out = p.profile("-d 2 -e cpu -i 1ms -o collapsed");
|
||||
Assert.isGreater(out.ratio("StringBuilderTest.main;java/lang/StringBuilder.delete;"), 0.8);
|
||||
Assert.isGreater(out.ratio("(forward|foward|backward)_copy_longs"), 0.8); // there's a typo on some JDK versions
|
||||
Assert.isLess(out.ratio("unknown|break_compiled"), 0.005);
|
||||
}
|
||||
|
||||
@Test(mainClass = Numbers.class, jvm = Jvm.HOTSPOT, debugNonSafepoints = true)
|
||||
public void numbers(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-d 3 -e cpu --cstack fp -o collapsed");
|
||||
if (p.currentJvm() == Jvm.HOTSPOT_C2) Assert.isGreater(out.ratio("vtable stub"), 0.01);
|
||||
Assert.isGreater(out.ratio("Numbers.loop"), 0.8);
|
||||
|
||||
out = p.profile("-d 2 -e cpu -i 1ms -o collapsed");
|
||||
Output out = p.profile("-d 2 -e cpu -i 1ms -o collapsed");
|
||||
Assert.isGreater(out.ratio("Numbers.main;test/recovery/Numbers.loop"), 0.8);
|
||||
Assert.isGreater(out.ratio("Numbers.main;test/recovery/Numbers.loop;test/recovery/Numbers.avg"), 0.5);
|
||||
Assert.isLess(out.ratio("unknown|break_compiled"), 0.005);
|
||||
if (p.currentJvm() == Jvm.HOTSPOT_C2) Assert.isGreater(out.ratio("vtable stub"), 0.01);
|
||||
}
|
||||
|
||||
@Test(mainClass = Suppliers.class, jvm = Jvm.HOTSPOT, debugNonSafepoints = true)
|
||||
public void suppliers(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-d 3 -e cpu --cstack fp -o collapsed");
|
||||
if (p.currentJvm() == Jvm.HOTSPOT_C2) Assert.isGreater(out.ratio("itable stub"), 0.01);
|
||||
Assert.isGreater(out.ratio("Suppliers.loop"), 0.5);
|
||||
|
||||
out = p.profile("-d 2 -e cpu -i 1ms -o collapsed");
|
||||
Output out = p.profile("-d 2 -e cpu -i 1ms -o collapsed");
|
||||
Assert.isGreater(out.ratio("Suppliers.main;test/recovery/Suppliers.loop"), 0.5);
|
||||
Assert.isLess(out.ratio("unknown|break_compiled"), 0.005);
|
||||
if (p.currentJvm() == Jvm.HOTSPOT_C2) Assert.isGreater(out.ratio("itable stub"), 0.01);
|
||||
}
|
||||
|
||||
@Test(mainClass = CodingIntrinsics.class, debugNonSafepoints = true, arch = {Arch.ARM64, Arch.X64})
|
||||
@@ -75,7 +63,7 @@ public class RecoveryTests {
|
||||
}
|
||||
|
||||
// Verify that System.currentTimeMillis() intrinsic is unwound correctly
|
||||
// TODO: Enable test on JDK 11 after fixing #1653
|
||||
// TODO: The test is flaky on JDK 11
|
||||
@Test(mainClass = TimeLoop.class, jvm = Jvm.HOTSPOT, jvmVer = {17, Integer.MAX_VALUE}, debugNonSafepoints = true)
|
||||
public void currentTimeMillis(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-d 3 -e cpu -o collapsed");
|
||||
|
||||
Reference in New Issue
Block a user