mirror of
https://github.com/async-profiler/async-profiler.git
synced 2026-04-28 02:53:00 +00:00
Native memory profiler (#1064)
This commit is contained in:
BIN
.assets/images/nativemem_flamegraph.png
Normal file
BIN
.assets/images/nativemem_flamegraph.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 69 KiB |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,3 +7,4 @@
|
||||
.vscode
|
||||
*.iml
|
||||
/src/api/**/*.class
|
||||
.gdb_history
|
||||
|
||||
6
Makefile
6
Makefile
@@ -56,7 +56,7 @@ CPP_TEST_HEADER := test/native/testRunner.hpp
|
||||
CPP_TEST_INCLUDES := -Isrc -Itest/native
|
||||
|
||||
ifeq ($(JAVA_HOME),)
|
||||
export JAVA_HOME:=$(shell java -cp . JavaHome)
|
||||
JAVA_HOME:=$(shell java -cp . JavaHome)
|
||||
endif
|
||||
|
||||
OS:=$(shell uname -s)
|
||||
@@ -197,6 +197,7 @@ build-test: build-test-cpp build-test-java
|
||||
build-test-libs:
|
||||
@mkdir -p $(TEST_LIB_DIR)
|
||||
$(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libreladyn.$(SOEXT) test/native/libs/reladyn.c
|
||||
$(CC) -shared -fPIC $(INCLUDES) -Isrc -o $(TEST_LIB_DIR)/libjnimalloc.$(SOEXT) test/native/libs/jnimalloc.c
|
||||
|
||||
test-cpp: build-test-cpp
|
||||
echo "Running cpp tests..."
|
||||
@@ -204,7 +205,8 @@ test-cpp: build-test-cpp
|
||||
|
||||
test-java: build-test-java
|
||||
echo "Running tests against $(LIB_PROFILER)"
|
||||
$(JAVA) $(TEST_FLAGS) -ea -cp "build/test.jar:build/jar/*:build/lib/*" one.profiler.test.Runner $(TESTS)
|
||||
|
||||
$(JAVA) "-Djava.library.path=$(TEST_LIB_DIR)" $(TEST_FLAGS) -ea -cp "build/test.jar:build/jar/*:build/lib/*" one.profiler.test.Runner $(TESTS)
|
||||
|
||||
coverage: override FAT_BINARY=false
|
||||
coverage: clean-coverage
|
||||
|
||||
10
README.md
10
README.md
@@ -13,7 +13,8 @@ What can be profiled:
|
||||
|
||||
- CPU time
|
||||
- Allocations in Java Heap
|
||||
- Contented locks
|
||||
- Native memory allocations and leaks
|
||||
- Contended locks
|
||||
- Hardware and software performance counters like cache misses, page faults, context switches
|
||||
- and [more](docs/ProfilingModes.md).
|
||||
|
||||
@@ -28,7 +29,12 @@ Current release (3.0):
|
||||
- Linux arm64: [async-profiler-3.0-linux-arm64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-linux-arm64.tar.gz)
|
||||
- macOS x64/arm64: [async-profiler-3.0-macos.zip](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-macos.zip)
|
||||
- Converters between profile formats: [converter.jar](https://github.com/async-profiler/async-profiler/releases/download/v3.0/converter.jar)
|
||||
(JFR to Flame Graph, JFR to pprof, collapsed stacks to Flame Graph)
|
||||
|
||||
| From | html | collapsed | pprof | pb.gz |
|
||||
| --------- | ---- | --------- | ----- | ----- |
|
||||
| collapsed | ✅ | ✅ | ❌ | ❌ |
|
||||
| html | ✅ | ✅ | ❌ | ❌ |
|
||||
| jfr | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
[Previous releases](https://github.com/async-profiler/async-profiler/releases)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Converter usage & demo
|
||||
# Converter Usage
|
||||
|
||||
async-profiler provides a converter utility to convert the profile output to other popular formats. async-profiler
|
||||
provides `jfrconv` as part of the compressed package which is found in the same location as the `asprof` binary. A
|
||||
@@ -6,18 +6,20 @@ standalone converter binary is also available [here](ttps://github.com/async-pro
|
||||
|
||||
## Supported conversions
|
||||
|
||||
- collapsed -> html, collapsed
|
||||
- html -> html, collapsed
|
||||
- jfr -> html, collapsed, pprof, pb.gz
|
||||
| From | html | collapsed | pprof | pb.gz |
|
||||
| --------- | ---- | --------- | ----- | ----- |
|
||||
| collapsed | ✅ | ✅ | ❌ | ❌ |
|
||||
| html | ✅ | ✅ | ❌ | ❌ |
|
||||
| jfr | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
## Usage
|
||||
|
||||
`jfrconv [options] <input> [<input>...] <output>`
|
||||
```
|
||||
jfrconv [options] <input> [<input>...] <output>
|
||||
```
|
||||
|
||||
The output format specified can be only one at a time for conversion from one format to another.
|
||||
|
||||
### Available arguments
|
||||
|
||||
```
|
||||
Conversion options:
|
||||
-o --output FORMAT, -o can be omitted if the output file extension unambiguously determines the format, e.g. profile.collapsed
|
||||
@@ -42,6 +44,8 @@ JFR options:
|
||||
--wall Generate only Wall clock profile during conversion
|
||||
--alloc Generate only Allocation profile during conversion
|
||||
--live Build allocation profile from live objects only during conversion
|
||||
--nativemem Generate native memory allocation profile
|
||||
--leak Only include memory leaks in nativemem
|
||||
--lock Generate only Lock contention profile during conversion
|
||||
-t --threads Split stack traces by threads
|
||||
-s --state LIST Filter thread states: runnable, sleeping, default. State name is case insensitive
|
||||
@@ -74,13 +78,13 @@ Flame Graph options:
|
||||
--highlight REGEX Highlight frames matching the given pattern
|
||||
```
|
||||
|
||||
### Example usages with `jfrconv`
|
||||
## `jfrconv` Examples
|
||||
|
||||
This section explains how the binary `jfrconv` can be used which exists in the same bin folder as
|
||||
`asprof`binary.
|
||||
`jfrconv` is built into the same location as the `asprof` binary.
|
||||
|
||||
The below command will generate a foo.html. If no output file is specified, it defaults to a
|
||||
Flame Graph output.
|
||||
### Generate flamegraph from jfr
|
||||
|
||||
If no output file is specified, it defaults to a Flame Graph output.
|
||||
|
||||
```
|
||||
jfrconv foo.jfr
|
||||
@@ -92,44 +96,34 @@ Flame Graph will have an aggregation of both in the view. Such a view wouldn't m
|
||||
hence it is advisable to use JFR conversion filter options like `--cpu` to filter out events
|
||||
during a conversion.
|
||||
|
||||
```
|
||||
jfrconv --cpu foo.jfr -o foo.html
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```
|
||||
jfrconv --cpu foo.jfr
|
||||
|
||||
# which is equivalent to:
|
||||
# jfrconv --cpu -o flamegraph foo.jfr foo.html
|
||||
```
|
||||
|
||||
for HTML output as HTML is the default format for conversion from JFR.
|
||||
|
||||
In case the conversion output is a Flame Graph, it can be further formatted with the use of flags
|
||||
specified above under `Flame Graph options`. The below command(s) will add a title string named `Title`
|
||||
to the Flame Graph instead of the default `Flame Graph` title and also will reverse the graph view
|
||||
by reversing the stack traces.
|
||||
#### Flame Graph options
|
||||
|
||||
To add a custom title to the generated Flame Graph, use `--title`, which has the default value `Flame Graph`:
|
||||
|
||||
```
|
||||
jfrconv --cpu foo.jfr foo.html -r --title Title
|
||||
jfrconv --cpu foo.jfr foo.html -r --title "Custom Title"
|
||||
```
|
||||
|
||||
or
|
||||
### Other formats
|
||||
|
||||
```
|
||||
jfrconv --cpu foo.jfr --reverse --title Title
|
||||
```
|
||||
`jfrconv` supports converting a JFR file to `collapsed`, `pprof` and `pb.gz` formats as well.
|
||||
|
||||
These are few common use cases. Similarly, a JFR output can be converted to `collapsed`, `pprof` and
|
||||
`pb.gz` formats based on specific needs.
|
||||
## Standalone converter examples
|
||||
|
||||
### Example usages with standalone converter
|
||||
|
||||
The usage with standalone converter jar provided in
|
||||
[Download](https://github.com/async-profiler/async-profiler/?tab=readme-ov-file#Download)
|
||||
section is very similar to `jfrconv`.
|
||||
Standalone converter jar is provided in
|
||||
[Download](https://github.com/async-profiler/async-profiler/?tab=readme-ov-file#Download). It accepts the same parameters as `jfrconv`.
|
||||
|
||||
Below is an example usage:
|
||||
|
||||
`java -cp /path/to/standalone-converter-jar --cpu foo.jfr --reverse --title Application CPU profile`
|
||||
|
||||
The only difference lies in how the binary is used.
|
||||
```
|
||||
java -cp /path/to/standalone-converter-jar --cpu foo.jfr --reverse --title "Application CPU profile"
|
||||
```
|
||||
|
||||
@@ -28,12 +28,13 @@ The below options are `action`s for async-profiler and common for both `asprof`
|
||||
| asprof | Launch as agent | Description |
|
||||
| ------------------ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `-o fmt` | `fmt` | Specifies what information to dump when profiling ends. For various dump option details, please refer to [Dump Option Appendix](#dump-option). |
|
||||
| `-d N` | N/A | asprof-only option designed for interactive use. It is a shortcut for running 3 actions: start, sleep for N seconds, stop. If no `start`, `resume`, `stop` or `status` option is given, the profiler will run for the specified period of time and then automatically stop.<br>Example: `asprof -d 30 <pid>`` |
|
||||
| `--timeout N` | `timeout=N` | The profiling duration, in seconds. The profiler will run for the specified period of time and then automatically stop.<br>Example: `java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,timeout=30,file=profile.html <application>` |
|
||||
| `-e --event EVENT` | `event=EVENT` | The profiling event: `cpu`, `alloc`, `lock`, `cache-misses` etc. Use `list` to see the complete list of available events.<br>Please refer to [Special Event Types](https://github.com/async-profiler/async-profiler/blob/master/docs/ProfilingModes.md#special-event-types-supported-on-linux) for additional information. |
|
||||
| `-i --interval N` | `interval=N` | Interval has different meaning depending on the event. For CPU profiling, it's CPU time. In wall clock mode, it's wall clock time. For Java method profiling or native function profiling, it's number of calls. For PMU profiling, it's number of events.<br>Example: `asprof -e cpu -i 500us 8983` |
|
||||
| `-d N` | N/A | asprof-only option designed for interactive use. It is a shortcut for running 3 actions: start, sleep for N seconds, stop. If no `start`, `resume`, `stop` or `status` option is given, the profiler will run for the specified period of time and then automatically stop.<br/>Example: `asprof -d 30 <pid>`` |
|
||||
| `--timeout N` | `timeout=N` | The profiling duration, in seconds. The profiler will run for the specified period of time and then automatically stop.<br/>Example: `java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,timeout=30,file=profile.html <application>` |
|
||||
| `-e --event EVENT` | `event=EVENT` | The profiling event: `cpu`, `alloc`, `nativemem`, `lock`, `cache-misses` etc. Use `list` to see the complete list of available events.</br>Please refer to [Special Event Types](https://github.com/async-profiler/async-profiler/blob/master/docs/ProfilingModes.md#special-event-types-supported-on-linux) for additional information. |
|
||||
| `-i --interval N` | `interval=N` | Interval has different meaning depending on the event. For CPU profiling, it's CPU time. In wall clock mode, it's wall clock time. For Java method profiling or native function profiling, it's number of calls. For PMU profiling, it's number of events.<br/>Example: `asprof -e cpu -i 500us 8983` |
|
||||
| `--alloc N` | `alloc=N` | Allocation profiling interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). |
|
||||
| `--live` | `live` | Retain allocation samples with live objects only (object that have not been collected by the end of profiling session). Useful for finding Java heap memory leaks. |
|
||||
| `--nativemem N` | `nativemem=N` | Native memory allocation profiling. N, if specified is the interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). Default N is 0. |
|
||||
| `--lock DURATION` | `lock=DURATION` | In lock profiling mode, sample contended locks when total lock duration overflows the threshold |
|
||||
| `-j N` | `jstackdepth=N` | Sets the maximum stack depth. The default is 2048.<br>Example: `asprof -j 30 8983` |
|
||||
| `-I PATTERN` | `include=PATTERN` | Filter stack traces by the given pattern(s). `-I` defines the name pattern that _must_ be present in the stack traces. `-I` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -I 'Primes.*' -I 'java/*' 8983` |
|
||||
|
||||
@@ -89,6 +89,35 @@ $ gdb $JAVA_HOME/lib/server/libjvm.so -ex 'info address UseG1GC'
|
||||
This command's output will either contain `Symbol "UseG1GC" is at 0xxxxx`
|
||||
or `No symbol "UseG1GC" in current context`.
|
||||
|
||||
## Native memory leaks
|
||||
|
||||
The profiling mode `nativemem` records `malloc`, `realloc`, `calloc` and `free` calls with the addresses, so that allocations can be matched with frees. This helps to focus the profile report only on unfreed allocations, which are the likely to be a source of a memory leak.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
asprof start -e nativemem -f app.jfr <YourApp>
|
||||
# asprof start --nativemem N -f app.jfr <YourApp>
|
||||
|
||||
asprof stop <YourApp>
|
||||
```
|
||||
|
||||
Now we need to process the jfr file, to find native memory leaks:
|
||||
|
||||
```
|
||||
# --total for bytes, default counts invocations.
|
||||
jfrconv --total --nativemem --leak app.jfr app-leak.html
|
||||
|
||||
# No leak analysis, include all native allocations:
|
||||
jfrconv --total --nativemem app.jfr app-malloc.html
|
||||
```
|
||||
|
||||
When `--leak` option is used, the generated flame graph will show allocations without matching `free` calls:
|
||||
|
||||

|
||||
|
||||
The overhead of `nativemem` profiling depends on the number of native allocations, but is usually small enough even for production use. If required, the overhead can be reduced by configuring the profiling interval. E.g. if you add `nativemem=1m` profiler option, allocation samples will be limited to at most one sample per allocated megabyte.
|
||||
|
||||
## Wall-clock profiling
|
||||
|
||||
`-e wall` option tells async-profiler to sample all threads equally every given
|
||||
|
||||
@@ -7,6 +7,14 @@
|
||||
#define _ARCH_H
|
||||
|
||||
|
||||
# ifndef likely
|
||||
# define likely(x) (__builtin_expect(!!(x), 1))
|
||||
# endif
|
||||
|
||||
# ifndef unlikely
|
||||
# define unlikely(x) (__builtin_expect(!!(x), 0))
|
||||
# endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
typedef unsigned short u16;
|
||||
typedef unsigned int u32;
|
||||
|
||||
@@ -215,6 +215,8 @@ Error Arguments::parse(const char* args) {
|
||||
msg = "event must not be empty";
|
||||
} else if (strcmp(value, EVENT_ALLOC) == 0) {
|
||||
if (_alloc < 0) _alloc = 0;
|
||||
} else if (strcmp(value, EVENT_NATIVEMEM) == 0) {
|
||||
if (_nativemem < 0) _nativemem = 0;
|
||||
} else if (strcmp(value, EVENT_LOCK) == 0) {
|
||||
if (_lock < 0) _lock = DEFAULT_LOCK_INTERVAL;
|
||||
} else if (_event != NULL) {
|
||||
@@ -237,6 +239,9 @@ Error Arguments::parse(const char* args) {
|
||||
CASE("alloc")
|
||||
_alloc = value == NULL ? 0 : parseUnits(value, BYTES);
|
||||
|
||||
CASE("nativemem")
|
||||
_nativemem = value == NULL ? 0 : parseUnits(value, BYTES);
|
||||
|
||||
CASE("lock")
|
||||
_lock = value == NULL ? 0 : parseUnits(value, NANOS);
|
||||
|
||||
@@ -420,7 +425,7 @@ Error Arguments::parse(const char* args) {
|
||||
return Error(msg);
|
||||
}
|
||||
|
||||
if (_event == NULL && _alloc < 0 && _lock < 0 && _wall < 0) {
|
||||
if (_event == NULL && _alloc < 0 && _lock < 0 && _wall < 0 && _nativemem < 0) {
|
||||
_event = EVENT_CPU;
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ const int DEFAULT_JSTACKDEPTH = 2048;
|
||||
|
||||
const char* const EVENT_CPU = "cpu";
|
||||
const char* const EVENT_ALLOC = "alloc";
|
||||
const char* const EVENT_NATIVEMEM = "nativemem";
|
||||
const char* const EVENT_LOCK = "lock";
|
||||
const char* const EVENT_WALL = "wall";
|
||||
const char* const EVENT_CTIMER = "ctimer";
|
||||
@@ -161,6 +162,7 @@ class Arguments {
|
||||
int _timeout;
|
||||
long _interval;
|
||||
long _alloc;
|
||||
long _nativemem;
|
||||
long _lock;
|
||||
long _wall;
|
||||
int _jstackdepth;
|
||||
@@ -212,6 +214,7 @@ class Arguments {
|
||||
_timeout(0),
|
||||
_interval(0),
|
||||
_alloc(-1),
|
||||
_nativemem(-1),
|
||||
_lock(-1),
|
||||
_wall(-1),
|
||||
_jstackdepth(DEFAULT_JSTACKDEPTH),
|
||||
|
||||
@@ -107,20 +107,6 @@ void CodeCache::sort() {
|
||||
if (_max_address == NO_MAX_ADDRESS) _max_address = _blobs[_count - 1]._end;
|
||||
}
|
||||
|
||||
void CodeCache::mark(NamePredicate predicate, char value) {
|
||||
for (int i = 0; i < _count; i++) {
|
||||
const char* blob_name = _blobs[i]._name;
|
||||
if (blob_name != NULL && predicate(blob_name)) {
|
||||
NativeFunc::mark(blob_name, value);
|
||||
}
|
||||
}
|
||||
|
||||
if (value == MARK_VM_RUNTIME && _name != NULL) {
|
||||
// In case a library has no debug symbols
|
||||
NativeFunc::mark(_name, value);
|
||||
}
|
||||
}
|
||||
|
||||
CodeBlob* CodeCache::findBlob(const char* name) {
|
||||
for (int i = 0; i < _count; i++) {
|
||||
const char* blob_name = _blobs[i]._name;
|
||||
@@ -184,11 +170,26 @@ const void* CodeCache::findSymbolByPrefix(const char* prefix, int prefix_len) {
|
||||
|
||||
void CodeCache::addImport(void** entry, const char* name) {
|
||||
switch (name[0]) {
|
||||
case 'c':
|
||||
if (strcmp(name, "calloc") == 0) {
|
||||
_imports[im_calloc] = entry;
|
||||
}
|
||||
break;
|
||||
case 'd':
|
||||
if (strcmp(name, "dlopen") == 0) {
|
||||
_imports[im_dlopen] = entry;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
if (strcmp(name, "free") == 0) {
|
||||
_imports[im_free] = entry;
|
||||
}
|
||||
break;
|
||||
case 'm':
|
||||
if (strcmp(name, "malloc") == 0) {
|
||||
_imports[im_malloc] = entry;
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
if (strcmp(name, "pthread_create") == 0) {
|
||||
_imports[im_pthread_create] = entry;
|
||||
@@ -200,6 +201,11 @@ void CodeCache::addImport(void** entry, const char* name) {
|
||||
_imports[im_poll] = entry;
|
||||
}
|
||||
break;
|
||||
case 'r':
|
||||
if (strcmp(name, "realloc") == 0) {
|
||||
_imports[im_realloc] = entry;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
#define NO_MIN_ADDRESS ((const void*)-1)
|
||||
#define NO_MAX_ADDRESS ((const void*)0)
|
||||
|
||||
typedef bool (*NamePredicate)(const char* name);
|
||||
|
||||
const int INITIAL_CODE_CACHE_CAPACITY = 1000;
|
||||
const int MAX_NATIVE_LIBS = 2048;
|
||||
|
||||
@@ -24,13 +22,18 @@ enum ImportId {
|
||||
im_pthread_exit,
|
||||
im_pthread_setspecific,
|
||||
im_poll,
|
||||
im_malloc,
|
||||
im_calloc,
|
||||
im_realloc,
|
||||
im_free,
|
||||
NUM_IMPORTS
|
||||
};
|
||||
|
||||
enum Mark {
|
||||
MARK_VM_RUNTIME = 1,
|
||||
MARK_INTERPRETER = 2,
|
||||
MARK_COMPILER_ENTRY = 3
|
||||
MARK_COMPILER_ENTRY = 3,
|
||||
MARK_ASYNC_PROFILER = 4, // async-profiler internals such as native hooks.
|
||||
};
|
||||
|
||||
|
||||
@@ -159,7 +162,21 @@ class CodeCache {
|
||||
void add(const void* start, int length, const char* name, bool update_bounds = false);
|
||||
void updateBounds(const void* start, const void* end);
|
||||
void sort();
|
||||
void mark(NamePredicate predicate, char value);
|
||||
|
||||
template <typename NamePredicate>
|
||||
inline void mark(NamePredicate predicate, char value) {
|
||||
for (int i = 0; i < _count; i++) {
|
||||
const char* blob_name = _blobs[i]._name;
|
||||
if (blob_name != NULL && predicate(blob_name)) {
|
||||
NativeFunc::mark(blob_name, value);
|
||||
}
|
||||
}
|
||||
|
||||
if (value == MARK_VM_RUNTIME && _name != NULL) {
|
||||
// In case a library has no debug symbols
|
||||
NativeFunc::mark(_name, value);
|
||||
}
|
||||
}
|
||||
|
||||
void addImport(void** entry, const char* name);
|
||||
void** findImport(ImportId id);
|
||||
|
||||
@@ -97,6 +97,8 @@ public class Main {
|
||||
" --wall Wall clock profile\n" +
|
||||
" --alloc Allocation profile\n" +
|
||||
" --live Live object profile\n" +
|
||||
" --nativemem malloc profile\n" +
|
||||
" --leak Only include memory leaks in nativemem\n" +
|
||||
" --lock Lock contention profile\n" +
|
||||
" -t --threads Split stack traces by threads\n" +
|
||||
" -s --state LIST Filter thread states: runnable, sleeping\n" +
|
||||
|
||||
@@ -25,6 +25,8 @@ public class Arguments {
|
||||
public boolean cpu;
|
||||
public boolean wall;
|
||||
public boolean alloc;
|
||||
public boolean nativemem;
|
||||
public boolean leak;
|
||||
public boolean live;
|
||||
public boolean lock;
|
||||
public boolean threads;
|
||||
|
||||
@@ -21,11 +21,17 @@ import static one.convert.Frame.*;
|
||||
public abstract class JfrConverter extends Classifier {
|
||||
protected final JfrReader jfr;
|
||||
protected final Arguments args;
|
||||
protected final IEventAggregator eventAggregator;
|
||||
protected Dictionary<String> methodNames;
|
||||
|
||||
public JfrConverter(JfrReader jfr, Arguments args) {
|
||||
boolean leakDetection = args.nativemem && args.leak;
|
||||
|
||||
this.jfr = jfr;
|
||||
this.args = args;
|
||||
|
||||
IEventAggregator agg = new EventAggregator(args.threads, args.total);
|
||||
this.eventAggregator = leakDetection ? new MallocLeakAggregator(agg) : agg;
|
||||
}
|
||||
|
||||
public void convert() throws IOException {
|
||||
@@ -33,19 +39,35 @@ public abstract class JfrConverter extends Classifier {
|
||||
while (jfr.hasMoreChunks()) {
|
||||
// Reset method dictionary, since new chunk may have different IDs
|
||||
methodNames = new Dictionary<>();
|
||||
collectEvents();
|
||||
|
||||
eventAggregator.setFactor(args.lock ? 1e9 / jfr.ticksPerSec : 1.0);
|
||||
eventAggregator.finishChunk();
|
||||
if (args.grain > 0) {
|
||||
eventAggregator.coarsen(args.grain);
|
||||
}
|
||||
|
||||
convertChunk();
|
||||
}
|
||||
eventAggregator.resetChunk();
|
||||
}
|
||||
|
||||
protected abstract void convertChunk() throws IOException;
|
||||
finalizeAggregator();
|
||||
}
|
||||
|
||||
protected EventAggregator collectEvents() throws IOException {
|
||||
EventAggregator agg = new EventAggregator(args.threads, args.total, args.lock ? 1e9 / jfr.ticksPerSec : 1.0);
|
||||
private void finalizeAggregator()throws IOException {
|
||||
eventAggregator.finish();
|
||||
convertChunk();
|
||||
eventAggregator.resetChunk();
|
||||
}
|
||||
|
||||
Class<? extends Event> eventClass =
|
||||
args.live ? LiveObject.class :
|
||||
args.alloc ? AllocationSample.class :
|
||||
args.lock ? ContendedLock.class : ExecutionSample.class;
|
||||
protected abstract void convertChunk();
|
||||
|
||||
protected void collectEvents() throws IOException {
|
||||
Class<? extends Event> eventClass = args.live ? LiveObject.class
|
||||
: args.alloc ? AllocationSample.class
|
||||
: args.lock ? ContendedLock.class
|
||||
: args.nativemem ? MallocEvent.class
|
||||
: ExecutionSample.class;
|
||||
|
||||
BitSet threadStates = null;
|
||||
if (args.state != null) {
|
||||
@@ -65,16 +87,10 @@ public abstract class JfrConverter extends Classifier {
|
||||
for (Event event; (event = jfr.readEvent(eventClass)) != null;) {
|
||||
if (event.time >= startTicks && event.time <= endTicks) {
|
||||
if (threadStates == null || threadStates.get(((ExecutionSample) event).threadState)) {
|
||||
agg.collect(event);
|
||||
eventAggregator.collect(event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (args.grain > 0) {
|
||||
agg.coarsen(args.grain);
|
||||
}
|
||||
|
||||
return agg;
|
||||
}
|
||||
|
||||
protected int toThreadState(String name) {
|
||||
@@ -163,8 +179,8 @@ public abstract class JfrConverter extends Classifier {
|
||||
|
||||
protected String getThreadName(int tid) {
|
||||
String threadName = jfr.threads.get(tid);
|
||||
return threadName == null ? "[tid=" + tid + ']' :
|
||||
threadName.startsWith("[tid=") ? threadName : '[' + threadName + " tid=" + tid + ']';
|
||||
return threadName == null ? "[tid=" + tid + ']'
|
||||
: threadName.startsWith("[tid=") ? threadName : '[' + threadName + " tid=" + tid + ']';
|
||||
}
|
||||
|
||||
protected String toJavaClassName(byte[] symbol, int start, boolean dotted) {
|
||||
|
||||
@@ -9,7 +9,7 @@ import one.jfr.JfrReader;
|
||||
import one.jfr.StackTrace;
|
||||
import one.jfr.event.AllocationSample;
|
||||
import one.jfr.event.Event;
|
||||
import one.jfr.event.EventAggregator;
|
||||
import one.jfr.event.IEventAggregator;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
@@ -30,8 +30,8 @@ public class JfrToFlame extends JfrConverter {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void convertChunk() throws IOException {
|
||||
collectEvents().forEach(new EventAggregator.ValueVisitor() {
|
||||
protected void convertChunk() {
|
||||
eventAggregator.forEach(new IEventAggregator.ValueVisitor() {
|
||||
final CallStack stack = new CallStack();
|
||||
|
||||
@Override
|
||||
|
||||
@@ -8,7 +8,7 @@ package one.convert;
|
||||
import one.jfr.JfrReader;
|
||||
import one.jfr.StackTrace;
|
||||
import one.jfr.event.Event;
|
||||
import one.jfr.event.EventAggregator;
|
||||
import one.jfr.event.IEventAggregator;
|
||||
import one.proto.Proto;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
@@ -29,7 +29,9 @@ public class JfrToPprof extends JfrConverter {
|
||||
super(jfr, args);
|
||||
|
||||
Proto sampleType;
|
||||
if (args.alloc || args.live) {
|
||||
if (args.nativemem) {
|
||||
sampleType = valueType("malloc", args.total ? "bytes" : "count");
|
||||
} else if (args.alloc || args.live) {
|
||||
sampleType = valueType("allocations", args.total ? "bytes" : "count");
|
||||
} else if (args.lock) {
|
||||
sampleType = valueType("locks", args.total ? "nanoseconds" : "count");
|
||||
@@ -42,8 +44,8 @@ public class JfrToPprof extends JfrConverter {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void convertChunk() throws IOException {
|
||||
collectEvents().forEach(new EventAggregator.ValueVisitor() {
|
||||
protected void convertChunk() {
|
||||
eventAggregator.forEach(new IEventAggregator.ValueVisitor() {
|
||||
final Proto s = new Proto(100);
|
||||
|
||||
@Override
|
||||
|
||||
@@ -73,6 +73,8 @@ public class JfrReader implements Closeable {
|
||||
private int monitorEnter;
|
||||
private int threadPark;
|
||||
private int activeSetting;
|
||||
private int malloc;
|
||||
private int free;
|
||||
|
||||
public JfrReader(String fileName) throws IOException {
|
||||
this.ch = FileChannel.open(Paths.get(fileName), StandardOpenOption.READ);
|
||||
@@ -174,6 +176,10 @@ public class JfrReader implements Closeable {
|
||||
if (cls == null || cls == AllocationSample.class) return (E) readAllocationSample(true);
|
||||
} else if (type == allocationOutsideTLAB || type == allocationSample) {
|
||||
if (cls == null || cls == AllocationSample.class) return (E) readAllocationSample(false);
|
||||
} else if (type == malloc) {
|
||||
if (cls == null || cls == MallocEvent.class) return (E) readMallocEvent(true);
|
||||
} else if (type == free) {
|
||||
if (cls == null || cls == MallocEvent.class) return (E) readMallocEvent(false);
|
||||
} else if (type == liveObject) {
|
||||
if (cls == null || cls == LiveObject.class) return (E) readLiveObject();
|
||||
} else if (type == monitorEnter) {
|
||||
@@ -221,6 +227,15 @@ public class JfrReader implements Closeable {
|
||||
return new AllocationSample(time, tid, stackTraceId, classId, allocationSize, tlabSize);
|
||||
}
|
||||
|
||||
private MallocEvent readMallocEvent(boolean hasSize) {
|
||||
long time = getVarlong();
|
||||
int tid = getVarint();
|
||||
int stackTraceId = getVarint();
|
||||
long address = getVarlong();
|
||||
long size = hasSize ? getVarlong() : 0;
|
||||
return new MallocEvent(time, tid, stackTraceId, address, size);
|
||||
}
|
||||
|
||||
private LiveObject readLiveObject() {
|
||||
long time = getVarlong();
|
||||
int tid = getVarint();
|
||||
@@ -540,6 +555,8 @@ public class JfrReader implements Closeable {
|
||||
monitorEnter = getTypeId("jdk.JavaMonitorEnter");
|
||||
threadPark = getTypeId("jdk.ThreadPark");
|
||||
activeSetting = getTypeId("jdk.ActiveSetting");
|
||||
malloc = getTypeId("profiler.Malloc");
|
||||
free = getTypeId("profiler.Free");
|
||||
|
||||
registerEvent("jdk.CPULoad", CPULoad.class);
|
||||
registerEvent("jdk.GCHeapSummary", GCHeapSummary.class);
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
|
||||
package one.jfr.event;
|
||||
|
||||
public class EventAggregator {
|
||||
public class EventAggregator implements IEventAggregator {
|
||||
private static final int INITIAL_CAPACITY = 1024;
|
||||
|
||||
private final boolean threads;
|
||||
private final boolean total;
|
||||
private final double factor;
|
||||
private double factor;
|
||||
private Event[] keys;
|
||||
private long[] samples;
|
||||
private long[] values;
|
||||
@@ -18,13 +18,19 @@ public class EventAggregator {
|
||||
private double fraction;
|
||||
|
||||
public EventAggregator(boolean threads, boolean total) {
|
||||
this(threads, total, 1.0);
|
||||
}
|
||||
|
||||
public EventAggregator(boolean threads, boolean total, double factor) {
|
||||
this.threads = threads;
|
||||
this.total = total;
|
||||
|
||||
this.resetChunk();
|
||||
}
|
||||
|
||||
public void setFactor(double factor) {
|
||||
this.factor = factor;
|
||||
}
|
||||
|
||||
public void resetChunk() {
|
||||
this.size = 0;
|
||||
this.factor = 1;
|
||||
this.keys = new Event[INITIAL_CAPACITY];
|
||||
this.samples = new long[INITIAL_CAPACITY];
|
||||
this.values = new long[INITIAL_CAPACITY];
|
||||
@@ -59,7 +65,15 @@ public class EventAggregator {
|
||||
}
|
||||
}
|
||||
|
||||
public void forEach(Visitor visitor) {
|
||||
public void finish() {
|
||||
// EventAggregator does not need finishing.
|
||||
}
|
||||
|
||||
public void finishChunk() {
|
||||
// EventAggregator does not need finishing.
|
||||
}
|
||||
|
||||
public void forEach(IEventAggregator.Visitor visitor) {
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
if (keys[i] != null) {
|
||||
visitor.visit(keys[i], samples[i], values[i]);
|
||||
@@ -67,7 +81,7 @@ public class EventAggregator {
|
||||
}
|
||||
}
|
||||
|
||||
public void forEach(ValueVisitor visitor) {
|
||||
public void forEach(IEventAggregator.ValueVisitor visitor) {
|
||||
double factor = total ? this.factor : 0.0;
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
if (keys[i] != null) {
|
||||
@@ -77,6 +91,8 @@ public class EventAggregator {
|
||||
}
|
||||
|
||||
public void coarsen(double grain) {
|
||||
this.fraction = 0;
|
||||
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
if (keys[i] != null) {
|
||||
long s0 = samples[i];
|
||||
@@ -131,12 +147,4 @@ public class EventAggregator {
|
||||
samples = newSamples;
|
||||
values = newValues;
|
||||
}
|
||||
|
||||
public interface Visitor {
|
||||
void visit(Event event, long samples, long value);
|
||||
}
|
||||
|
||||
public interface ValueVisitor {
|
||||
void visit(Event event, long value);
|
||||
}
|
||||
}
|
||||
|
||||
32
src/converter/one/jfr/event/IEventAggregator.java
Normal file
32
src/converter/one/jfr/event/IEventAggregator.java
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package one.jfr.event;
|
||||
|
||||
public interface IEventAggregator {
|
||||
void collect(Event e);
|
||||
|
||||
void finishChunk();
|
||||
|
||||
void resetChunk();
|
||||
|
||||
void finish();
|
||||
|
||||
void setFactor(double factor);
|
||||
|
||||
void coarsen(double grain);
|
||||
|
||||
void forEach(Visitor visitor);
|
||||
|
||||
void forEach(ValueVisitor visitor);
|
||||
|
||||
public interface Visitor {
|
||||
void visit(Event event, long samples, long value);
|
||||
}
|
||||
|
||||
public interface ValueVisitor {
|
||||
void visit(Event event, long value);
|
||||
}
|
||||
}
|
||||
22
src/converter/one/jfr/event/MallocEvent.java
Normal file
22
src/converter/one/jfr/event/MallocEvent.java
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package one.jfr.event;
|
||||
|
||||
public class MallocEvent extends Event {
|
||||
public final long address;
|
||||
public final long size;
|
||||
|
||||
public MallocEvent(long time, int tid, int stackTraceId, long address, long size) {
|
||||
super(time, tid, stackTraceId);
|
||||
this.address = address;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long value() {
|
||||
return size;
|
||||
}
|
||||
}
|
||||
85
src/converter/one/jfr/event/MallocLeakAggregator.java
Normal file
85
src/converter/one/jfr/event/MallocLeakAggregator.java
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package one.jfr.event;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class MallocLeakAggregator implements IEventAggregator {
|
||||
private final IEventAggregator wrapped;
|
||||
private List<Event> events;
|
||||
private double grain = 0;
|
||||
private double factor = 1;
|
||||
|
||||
Map<Long, MallocEvent> addresses = new HashMap<>();
|
||||
|
||||
public MallocLeakAggregator(IEventAggregator wrapped) {
|
||||
this.wrapped = wrapped;
|
||||
this.events = new ArrayList<Event>();
|
||||
}
|
||||
|
||||
private List<Event> filter(List<Event> events) {
|
||||
for (Event event : events) {
|
||||
if (!(event instanceof MallocEvent)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
MallocEvent e = (MallocEvent) event;
|
||||
if (e.size > 0) {
|
||||
addresses.put(e.address, e);
|
||||
} else {
|
||||
addresses.remove(e.address);
|
||||
}
|
||||
}
|
||||
|
||||
return new ArrayList<>(addresses.values());
|
||||
}
|
||||
|
||||
public void collect(Event e) {
|
||||
events.add(e);
|
||||
}
|
||||
|
||||
public void finishChunk() {
|
||||
Collections.sort(events);
|
||||
events = filter(events);
|
||||
}
|
||||
|
||||
public void finish() {
|
||||
wrapped.setFactor(this.factor);
|
||||
|
||||
if (grain > 0) {
|
||||
wrapped.coarsen(grain);
|
||||
}
|
||||
|
||||
for (Event e : events) {
|
||||
wrapped.collect(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void coarsen(double grain) {
|
||||
// Delay coarsening until the final chunk is processed.
|
||||
this.grain = grain;
|
||||
}
|
||||
|
||||
public void setFactor(double factor) {
|
||||
this.factor = factor;
|
||||
}
|
||||
|
||||
public void resetChunk() {
|
||||
wrapped.resetChunk();
|
||||
}
|
||||
|
||||
public void forEach(IEventAggregator.Visitor visitor) {
|
||||
wrapped.forEach(visitor);
|
||||
}
|
||||
|
||||
public void forEach(IEventAggregator.ValueVisitor visitor) {
|
||||
wrapped.forEach(visitor);
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@ enum EventType {
|
||||
EXECUTION_SAMPLE,
|
||||
WALL_CLOCK_SAMPLE,
|
||||
INSTRUMENTED_METHOD,
|
||||
MALLOC_SAMPLE,
|
||||
ALLOC_SAMPLE,
|
||||
ALLOC_OUTSIDE_TLAB,
|
||||
LIVE_OBJECT,
|
||||
@@ -75,4 +76,11 @@ class ProfilingWindow : public Event {
|
||||
u64 _end_time;
|
||||
};
|
||||
|
||||
class MallocEvent : public Event {
|
||||
public:
|
||||
u64 _start_time;
|
||||
uintptr_t _address;
|
||||
u64 _size;
|
||||
};
|
||||
|
||||
#endif // _EVENT_H
|
||||
|
||||
@@ -1224,6 +1224,19 @@ class Recording {
|
||||
buf->put8(start, buf->offset() - start);
|
||||
}
|
||||
|
||||
void recordMallocSample(Buffer* buf, int tid, u32 call_trace_id, MallocEvent* event) {
|
||||
int start = buf->skip(1);
|
||||
buf->put8(event->_size != 0 ? T_MALLOC : T_FREE);
|
||||
buf->putVar64(event->_start_time);
|
||||
buf->putVar32(tid);
|
||||
buf->putVar32(call_trace_id);
|
||||
buf->putVar64(event->_address);
|
||||
if (event->_size != 0) {
|
||||
buf->putVar64(event->_size);
|
||||
}
|
||||
buf->put8(start, buf->offset() - start);
|
||||
}
|
||||
|
||||
void recordLiveObject(Buffer* buf, int tid, u32 call_trace_id, LiveObject* event) {
|
||||
int start = buf->skip(1);
|
||||
buf->put8(T_LIVE_OBJECT);
|
||||
@@ -1477,6 +1490,9 @@ void FlightRecorder::recordEvent(int lock_index, int tid, u32 call_trace_id,
|
||||
case WALL_CLOCK_SAMPLE:
|
||||
_rec->recordWallClockSample(buf, tid, call_trace_id, (WallClockEvent*)event);
|
||||
break;
|
||||
case MALLOC_SAMPLE:
|
||||
_rec->recordMallocSample(buf, tid, call_trace_id, (MallocEvent*)event);
|
||||
break;
|
||||
case ALLOC_SAMPLE:
|
||||
_rec->recordAllocationInNewTLAB(buf, tid, call_trace_id, (AllocEvent*)event);
|
||||
break;
|
||||
|
||||
@@ -229,6 +229,21 @@ JfrMetadata::JfrMetadata() : Element("root") {
|
||||
<< field("state", T_THREAD_STATE, "Thread State", F_CPOOL)
|
||||
<< field("samples", T_INT, "Samples", F_UNSIGNED))
|
||||
|
||||
<< (type("profiler.Malloc", T_MALLOC, "malloc")
|
||||
<< category("Java Virtual Machine", "Native Memory")
|
||||
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
|
||||
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
|
||||
<< field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL)
|
||||
<< field("address", T_LONG, "Address", F_ADDRESS)
|
||||
<< field("size", T_LONG, "Size", F_BYTES))
|
||||
|
||||
<< (type("profiler.Free", T_FREE, "free")
|
||||
<< category("Java Virtual Machine", "Native Memory")
|
||||
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
|
||||
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
|
||||
<< field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL)
|
||||
<< field("address", T_LONG, "Address", F_ADDRESS))
|
||||
|
||||
<< (type("jdk.jfr.Label", T_LABEL, NULL)
|
||||
<< field("value", T_STRING))
|
||||
|
||||
|
||||
@@ -60,6 +60,8 @@ enum JfrType {
|
||||
T_WINDOW = 116,
|
||||
T_LIVE_OBJECT = 117,
|
||||
T_WALL_CLOCK_SAMPLE = 118,
|
||||
T_MALLOC = 119,
|
||||
T_FREE = 120,
|
||||
|
||||
T_ANNOTATION = 200,
|
||||
T_LABEL = 201,
|
||||
|
||||
@@ -44,7 +44,7 @@ static const char USAGE_STRING[] =
|
||||
" collect collect profile for the specified period of time\n"
|
||||
" and then stop (default action)\n"
|
||||
"Options:\n"
|
||||
" -e event profiling event: cpu|alloc|lock|cache-misses etc.\n"
|
||||
" -e event profiling event: cpu|alloc|nativemem|lock|cache-misses etc.\n"
|
||||
" -d duration run profiling for <duration> seconds\n"
|
||||
" -f filename dump output to <filename>\n"
|
||||
" -i interval sampling interval in nanoseconds\n"
|
||||
@@ -69,6 +69,7 @@ static const char USAGE_STRING[] =
|
||||
" --loop time run profiler in a loop\n"
|
||||
" --alloc bytes allocation profiling interval in bytes\n"
|
||||
" --live build allocation profile from live objects only\n"
|
||||
" --nativemem bytes native allocation profiling interval in bytes\n"
|
||||
" --lock duration lock profiling threshold in nanoseconds\n"
|
||||
" --wall interval wall clock profiling interval\n"
|
||||
" --total accumulate the total value (time, bytes, etc.)\n"
|
||||
@@ -491,7 +492,7 @@ int main(int argc, const char** argv) {
|
||||
} else if (arg == "--reverse" || arg == "--samples" || arg == "--total" || arg == "--sched" || arg == "--live") {
|
||||
format << "," << (arg.str() + 2);
|
||||
|
||||
} else if (arg == "--alloc" || arg == "--lock" || arg == "--wall" ||
|
||||
} else if (arg == "--alloc" || arg == "--nativemem" || arg == "--lock" || arg == "--wall" ||
|
||||
arg == "--chunksize" || arg == "--chunktime" ||
|
||||
arg == "--cstack" || arg == "--signal" || arg == "--clock" || arg == "--begin" || arg == "--end") {
|
||||
params << "," << (arg.str() + 2) << "=" << args.next();
|
||||
|
||||
233
src/mallocTracer.cpp
Normal file
233
src/mallocTracer.cpp
Normal file
@@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "asprof.h"
|
||||
#include "assert.h"
|
||||
#include "codeCache.h"
|
||||
#include "mallocTracer.h"
|
||||
#include "os.h"
|
||||
#include "profiler.h"
|
||||
#include "tsc.h"
|
||||
#include <dlfcn.h>
|
||||
#include <string.h>
|
||||
|
||||
#define ADDRESS_OF(sym) ({ \
|
||||
void* addr = dlsym(RTLD_NEXT, #sym); \
|
||||
addr != NULL ? (sym##_t)addr : sym; \
|
||||
})
|
||||
|
||||
typedef void* (*malloc_t)(size_t);
|
||||
static malloc_t _orig_malloc = NULL;
|
||||
|
||||
typedef void* (*calloc_t)(size_t, size_t);
|
||||
static calloc_t _orig_calloc = NULL;
|
||||
|
||||
typedef void* (*realloc_t)(void*, size_t);
|
||||
static realloc_t _orig_realloc = NULL;
|
||||
|
||||
typedef void (*free_t)(void*);
|
||||
static free_t _orig_free = NULL;
|
||||
|
||||
static void* malloc_hook(size_t size) {
|
||||
void* ret = _orig_malloc(size);
|
||||
if (likely(ret && size)) {
|
||||
MallocTracer::recordMalloc(ret, size);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
extern "C" WEAK DLLEXPORT void* malloc(size_t size) {
|
||||
if (likely(MallocTracer::initialized())) {
|
||||
return malloc_hook(size);
|
||||
}
|
||||
|
||||
if (unlikely(!_orig_malloc)) {
|
||||
return NULL;
|
||||
}
|
||||
return _orig_malloc(size);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void* calloc_hook(size_t num, size_t size) {
|
||||
void* ret = _orig_calloc(num, size);
|
||||
if (likely(ret && num && size)) {
|
||||
MallocTracer::recordMalloc(ret, num * size);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
extern "C" WEAK DLLEXPORT void* calloc(size_t num, size_t size) {
|
||||
if (likely(MallocTracer::initialized())) {
|
||||
return calloc_hook(num, size);
|
||||
}
|
||||
if (unlikely(!_orig_calloc)) {
|
||||
return NULL;
|
||||
}
|
||||
return _orig_calloc(num, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void* realloc_hook(void* addr, size_t size) {
|
||||
void* ret = _orig_realloc(addr, size);
|
||||
if (likely(ret && addr)) {
|
||||
MallocTracer::recordFree(addr);
|
||||
}
|
||||
|
||||
if (likely(ret && size)) {
|
||||
MallocTracer::recordMalloc(ret, size);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
extern "C" WEAK DLLEXPORT void* realloc(void* addr, size_t size) {
|
||||
if (likely(MallocTracer::initialized())) {
|
||||
return realloc_hook(addr, size);
|
||||
}
|
||||
if (unlikely(!_orig_realloc)) {
|
||||
return NULL;
|
||||
}
|
||||
return _orig_realloc(addr, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void free_hook(void* addr) {
|
||||
_orig_free(addr);
|
||||
if (addr) {
|
||||
MallocTracer::recordFree(addr);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
extern "C" WEAK DLLEXPORT void free(void* addr) {
|
||||
if (likely(MallocTracer::initialized())) {
|
||||
return free_hook(addr);
|
||||
}
|
||||
if (unlikely(!_orig_free)) {
|
||||
return;
|
||||
}
|
||||
return _orig_free(addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
u64 MallocTracer::_interval;
|
||||
volatile u64 MallocTracer::_allocated_bytes;
|
||||
|
||||
Mutex MallocTracer::_patch_lock;
|
||||
int MallocTracer::_patched_libs = 0;
|
||||
bool MallocTracer::_initialized = false;
|
||||
|
||||
__attribute__((constructor)) static void getOrigAddresses() {
|
||||
// Store these addresses, regardless of MallocTracer being enabled or not.
|
||||
_orig_malloc = ADDRESS_OF(malloc);
|
||||
_orig_calloc = ADDRESS_OF(calloc);
|
||||
_orig_realloc = ADDRESS_OF(realloc);
|
||||
_orig_free = ADDRESS_OF(free);
|
||||
}
|
||||
|
||||
bool MallocTracer::initialize() {
|
||||
if (!__sync_bool_compare_and_swap(&_initialized, false, true)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
CodeCache* lib = Profiler::instance()->findLibraryByAddress((void*)MallocTracer::initialize);
|
||||
assert(lib);
|
||||
|
||||
lib->mark(
|
||||
[](const char* s) -> bool {
|
||||
return strncmp(s, "_ZL11malloc_hook", 16) == 0
|
||||
|| strncmp(s, "_ZL11calloc_hook", 16) == 0
|
||||
|| strncmp(s, "_ZL12realloc_hook", 17) == 0
|
||||
|| strncmp(s, "_ZL9free_hook", 13) == 0;
|
||||
},
|
||||
MARK_ASYNC_PROFILER);
|
||||
|
||||
return installHooks();
|
||||
}
|
||||
|
||||
bool MallocTracer::patchLibs(bool install) {
|
||||
if (!initialized()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
MutexLocker ml(_patch_lock);
|
||||
if (!install) {
|
||||
assert(_orig_malloc);
|
||||
assert(_orig_calloc);
|
||||
assert(_orig_realloc);
|
||||
assert(_orig_free);
|
||||
|
||||
_patched_libs = 0;
|
||||
}
|
||||
|
||||
CodeCacheArray* native_libs = Profiler::instance()->nativeLibs();
|
||||
int native_lib_count = native_libs->count();
|
||||
|
||||
while (_patched_libs < native_lib_count) {
|
||||
CodeCache* cc = (*native_libs)[_patched_libs++];
|
||||
|
||||
cc->patchImport(im_malloc, (void*)(install ? malloc_hook : _orig_malloc));
|
||||
cc->patchImport(im_calloc, (void*)(install ? calloc_hook : _orig_calloc));
|
||||
cc->patchImport(im_realloc, (void*)(install ? realloc_hook : _orig_realloc));
|
||||
cc->patchImport(im_free, (void*)(install ? free_hook : _orig_free));
|
||||
}
|
||||
|
||||
if (!install) {
|
||||
_patched_libs = 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MallocTracer::recordMalloc(void* address, size_t size) {
|
||||
if (updateCounter(_allocated_bytes, size, _interval)) {
|
||||
MallocEvent event;
|
||||
event._start_time = TSC::ticks();
|
||||
event._address = (uintptr_t)address;
|
||||
event._size = size;
|
||||
|
||||
Profiler::instance()->recordSample(NULL, size, MALLOC_SAMPLE, &event);
|
||||
}
|
||||
}
|
||||
|
||||
void MallocTracer::recordFree(void* address) {
|
||||
MallocEvent event;
|
||||
event._start_time = TSC::ticks();
|
||||
event._address = (uintptr_t)address;
|
||||
event._size = 0;
|
||||
|
||||
Profiler::instance()->recordEventOnly(MALLOC_SAMPLE, &event);
|
||||
}
|
||||
|
||||
Error MallocTracer::check(Arguments& args) {
|
||||
if (!OS::isLinux()) {
|
||||
return Error("nativemem option is only supported on linux.");
|
||||
} else {
|
||||
return Error::OK;
|
||||
}
|
||||
}
|
||||
|
||||
Error MallocTracer::start(Arguments& args) {
|
||||
Error error = check(args);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
|
||||
_interval = args._nativemem > 0 ? args._nativemem : 0;
|
||||
_allocated_bytes = 0;
|
||||
|
||||
if (!initialize() && initialized()) {
|
||||
// Restart.
|
||||
installHooks();
|
||||
}
|
||||
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
void MallocTracer::stop() {
|
||||
patchLibs(false);
|
||||
}
|
||||
57
src/mallocTracer.h
Normal file
57
src/mallocTracer.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifndef _MALLOCTRACER_H
|
||||
#define _MALLOCTRACER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "engine.h"
|
||||
#include "event.h"
|
||||
#include "mutex.h"
|
||||
#include "trap.h"
|
||||
|
||||
class MallocTracer : public Engine {
|
||||
private:
|
||||
static u64 _interval;
|
||||
static volatile u64 _allocated_bytes;
|
||||
|
||||
static Mutex _patch_lock;
|
||||
static int _patched_libs;
|
||||
static bool _initialized;
|
||||
|
||||
static bool initialize();
|
||||
static bool patchLibs(bool install);
|
||||
|
||||
public:
|
||||
const char* type() {
|
||||
return "malloc_tracer";
|
||||
}
|
||||
|
||||
const char* title() {
|
||||
return "Malloc/free profile";
|
||||
}
|
||||
|
||||
const char* units() {
|
||||
return "bytes";
|
||||
}
|
||||
|
||||
Error check(Arguments& args);
|
||||
Error start(Arguments& args);
|
||||
void stop();
|
||||
|
||||
inline static bool installHooks() {
|
||||
return patchLibs(true);
|
||||
}
|
||||
|
||||
inline static bool initialized() {
|
||||
return _initialized;
|
||||
}
|
||||
|
||||
static void recordMalloc(void* address, size_t size);
|
||||
static void recordFree(void* address);
|
||||
};
|
||||
|
||||
#endif // _MALLOCTRACER_H
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "perfEvents.h"
|
||||
#include "ctimer.h"
|
||||
#include "allocTracer.h"
|
||||
#include "mallocTracer.h"
|
||||
#include "lockTracer.h"
|
||||
#include "wallClock.h"
|
||||
#include "j9ObjectSampler.h"
|
||||
@@ -46,6 +47,7 @@ static SigAction orig_segvHandler = NULL;
|
||||
static Engine noop_engine;
|
||||
static PerfEvents perf_events;
|
||||
static AllocTracer alloc_tracer;
|
||||
static MallocTracer malloc_tracer;
|
||||
static LockTracer lock_tracer;
|
||||
static ObjectSampler object_sampler;
|
||||
static J9ObjectSampler j9_object_sampler;
|
||||
@@ -63,7 +65,8 @@ enum EventMask {
|
||||
EM_CPU = 1,
|
||||
EM_ALLOC = 2,
|
||||
EM_LOCK = 4,
|
||||
EM_WALL = 8
|
||||
EM_WALL = 8,
|
||||
EM_NATIVEMEM = 16,
|
||||
};
|
||||
|
||||
|
||||
@@ -89,6 +92,7 @@ static inline int hasNativeStack(EventType event_type) {
|
||||
(1 << PERF_SAMPLE) |
|
||||
(1 << EXECUTION_SAMPLE) |
|
||||
(1 << WALL_CLOCK_SAMPLE) |
|
||||
(1 << MALLOC_SAMPLE) |
|
||||
(1 << ALLOC_SAMPLE) |
|
||||
(1 << ALLOC_OUTSIDE_TLAB);
|
||||
return (1 << event_type) & events_with_native_stack;
|
||||
@@ -352,6 +356,9 @@ int Profiler::convertNativeTrace(int native_frames, const void** callchain, ASGC
|
||||
// Skip all internal frames above VM runtime entry for allocation samples
|
||||
depth = 0;
|
||||
continue;
|
||||
} else if (mark == MARK_ASYNC_PROFILER && event_type == MALLOC_SAMPLE) {
|
||||
// Skip all internal frames above the *_hook functions. Include the hook function itself.
|
||||
depth = 0;
|
||||
} else if (mark == MARK_INTERPRETER) {
|
||||
// This is C++ interpreter frame, this and later frames should be reported
|
||||
// as Java frames returned by AGCT. Terminate the scan here.
|
||||
@@ -671,6 +678,8 @@ u64 Profiler::recordSample(void* ucontext, u64 counter, EventType event_type, Ev
|
||||
} else {
|
||||
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth, &java_ctx);
|
||||
}
|
||||
} else if (event_type == MALLOC_SAMPLE && malloc_tracer.initialized()) {
|
||||
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth, &java_ctx);
|
||||
} else {
|
||||
// Lock events and instrumentation events can safely call synchronous JVM TI stack walker.
|
||||
// Skip Instrument.recordSample() method
|
||||
@@ -784,6 +793,7 @@ void* Profiler::dlopen_hook(const char* filename, int flags) {
|
||||
void* result = dlopen(filename, flags);
|
||||
if (result != NULL) {
|
||||
instance()->updateSymbols(false);
|
||||
MallocTracer::installHooks();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -1028,6 +1038,8 @@ Engine* Profiler::activeEngine() {
|
||||
return &lock_tracer;
|
||||
case EM_WALL:
|
||||
return &wall_clock;
|
||||
case EM_NATIVEMEM:
|
||||
return &malloc_tracer;
|
||||
default:
|
||||
return _engine;
|
||||
}
|
||||
@@ -1072,7 +1084,9 @@ Error Profiler::start(Arguments& args, bool reset) {
|
||||
_event_mask = (args._event != NULL ? EM_CPU : 0) |
|
||||
(args._alloc >= 0 ? EM_ALLOC : 0) |
|
||||
(args._lock >= 0 ? EM_LOCK : 0) |
|
||||
(args._wall >= 0 ? EM_WALL : 0);
|
||||
(args._wall >= 0 ? EM_WALL : 0) |
|
||||
(args._nativemem >= 0 ? EM_NATIVEMEM : 0);
|
||||
|
||||
if (_event_mask == 0) {
|
||||
return Error("No profiling events specified");
|
||||
} else if ((_event_mask & (_event_mask - 1)) && args._output != OUTPUT_JFR) {
|
||||
@@ -1164,7 +1178,6 @@ Error Profiler::start(Arguments& args, bool reset) {
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
|
||||
switchLibraryTrap(true);
|
||||
|
||||
if (args._output == OUTPUT_JFR) {
|
||||
@@ -1200,6 +1213,12 @@ Error Profiler::start(Arguments& args, bool reset) {
|
||||
goto error4;
|
||||
}
|
||||
}
|
||||
if (_event_mask & EM_NATIVEMEM) {
|
||||
error = malloc_tracer.start(args);
|
||||
if (error) {
|
||||
goto error5;
|
||||
}
|
||||
}
|
||||
|
||||
switchThreadEvents(JVMTI_ENABLE);
|
||||
|
||||
@@ -1214,6 +1233,9 @@ Error Profiler::start(Arguments& args, bool reset) {
|
||||
|
||||
return Error::OK;
|
||||
|
||||
error5:
|
||||
if (_event_mask & EM_NATIVEMEM) malloc_tracer.stop();
|
||||
|
||||
error4:
|
||||
if (_event_mask & EM_LOCK) lock_tracer.stop();
|
||||
|
||||
@@ -1246,6 +1268,7 @@ Error Profiler::stop(bool restart) {
|
||||
if (_event_mask & EM_WALL) wall_clock.stop();
|
||||
if (_event_mask & EM_LOCK) lock_tracer.stop();
|
||||
if (_event_mask & EM_ALLOC) _alloc_engine->stop();
|
||||
if (_event_mask & EM_NATIVEMEM) malloc_tracer.stop();
|
||||
|
||||
_engine->stop();
|
||||
|
||||
@@ -1289,6 +1312,9 @@ Error Profiler::check(Arguments& args) {
|
||||
_alloc_engine = selectAllocEngine(args._alloc, args._live);
|
||||
error = _alloc_engine->check(args);
|
||||
}
|
||||
if (!error && args._nativemem >= 0) {
|
||||
error = malloc_tracer.check(args);
|
||||
}
|
||||
if (!error && args._lock >= 0) {
|
||||
error = lock_tracer.check(args);
|
||||
}
|
||||
@@ -1754,6 +1780,7 @@ Error Profiler::runInternal(Arguments& args, Writer& out) {
|
||||
out << "Basic events:\n";
|
||||
out << " " << EVENT_CPU << "\n";
|
||||
out << " " << EVENT_ALLOC << "\n";
|
||||
out << " " << EVENT_NATIVEMEM << "\n";
|
||||
out << " " << EVENT_LOCK << "\n";
|
||||
out << " " << EVENT_WALL << "\n";
|
||||
out << " " << EVENT_ITIMER << "\n";
|
||||
|
||||
27
test/native/libs/jnimalloc.c
Normal file
27
test/native/libs/jnimalloc.c
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <jni.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_test_nativemem_Native_malloc(JNIEnv* env, jclass clazz, jlong size) {
|
||||
void* ptr = malloc((size_t)size);
|
||||
return (jlong)(intptr_t)ptr;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_test_nativemem_Native_calloc(JNIEnv* env, jclass clazz, jlong num, jlong size) {
|
||||
void* ptr = calloc(num, (size_t)size);
|
||||
return (jlong)(intptr_t)ptr;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_test_nativemem_Native_realloc(JNIEnv* env, jclass clazz, jlong addr, jlong size) {
|
||||
void* ptr = realloc((void*)(intptr_t)addr, (size_t)size);
|
||||
return (jlong)(intptr_t)ptr;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_test_nativemem_Native_free(JNIEnv* env, jclass clazz, jlong addr) {
|
||||
free((void*)(intptr_t)addr);
|
||||
}
|
||||
@@ -68,6 +68,8 @@ struct TestCase {
|
||||
|
||||
#define __ASSERT_OR_CHECK_OP(isAssert, val1, op, val2) \
|
||||
{ \
|
||||
_Pragma("GCC diagnostic push"); \
|
||||
_Pragma("GCC diagnostic ignored \"-Waddress\""); \
|
||||
const bool is_string = \
|
||||
std::is_same<decltype(val1), const char*>::value || std::is_same<decltype(val1), char*>::value || \
|
||||
std::is_same<decltype(val2), const char*>::value || std::is_same<decltype(val2), char*>::value; \
|
||||
@@ -96,6 +98,7 @@ struct TestCase {
|
||||
} else { \
|
||||
test_case.assertion_count++; \
|
||||
} \
|
||||
_Pragma("GCC diagnostic pop"); \
|
||||
}
|
||||
|
||||
// ASSERT stops execution after a failure.
|
||||
|
||||
@@ -221,7 +221,6 @@ public class Runner {
|
||||
|
||||
List<RunnableTest> allTests = getRunnableTests(args);
|
||||
final int testCount = allTests.size();
|
||||
|
||||
int i = 1;
|
||||
long totalTestDuration = 0;
|
||||
List<String> failedTests = new ArrayList<>();
|
||||
|
||||
@@ -26,6 +26,8 @@ public @interface Test {
|
||||
|
||||
String jvmArgs() default "";
|
||||
|
||||
String[] env() default {};
|
||||
|
||||
boolean debugNonSafepoints() default false;
|
||||
|
||||
boolean output() default false;
|
||||
|
||||
@@ -77,6 +77,14 @@ public class TestProcess implements Closeable {
|
||||
if (test.error()) {
|
||||
pb.redirectError(createTempFile(STDERR));
|
||||
}
|
||||
|
||||
for (String env : test.env()) {
|
||||
String[] keyValue = env.split("=", 2);
|
||||
if (keyValue.length == 2) {
|
||||
pb.environment().put(keyValue[0], keyValue[1]);
|
||||
}
|
||||
}
|
||||
|
||||
this.p = pb.start();
|
||||
|
||||
if (cmd.get(0).endsWith("java")) {
|
||||
@@ -118,6 +126,7 @@ public class TestProcess implements Closeable {
|
||||
cmd.add("-XX:+UnlockDiagnosticVMOptions");
|
||||
cmd.add("-XX:+DebugNonSafepoints");
|
||||
}
|
||||
cmd.add("-Djava.library.path=" + System.getProperty("java.library.path"));
|
||||
addArgs(cmd, test.jvmArgs());
|
||||
if (!test.agentArgs().isEmpty()) {
|
||||
cmd.add("-agentpath:" + profilerLibPath() + "=" +
|
||||
|
||||
47
test/test/nativemem/CallsAllNoLeak.java
Normal file
47
test/test/nativemem/CallsAllNoLeak.java
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package test.nativemem;
|
||||
|
||||
public class CallsAllNoLeak {
|
||||
|
||||
private static final int NUM_THREADS = 8; // Number of threads
|
||||
|
||||
private static final int MALLOC_SIZE = 1999993; // Prime size, useful in assertions.
|
||||
private static final int CALLOC_SIZE = 2000147;
|
||||
private static final int REALLOC_SIZE = 30000170;
|
||||
|
||||
private static void do_work(boolean once) {
|
||||
try {
|
||||
do {
|
||||
long addr = Native.malloc(MALLOC_SIZE);
|
||||
long reallocd = Native.realloc(addr, REALLOC_SIZE);
|
||||
Native.free(reallocd);
|
||||
|
||||
long callocd = Native.calloc(1, CALLOC_SIZE);
|
||||
Native.free(callocd);
|
||||
|
||||
Thread.sleep(1);
|
||||
} while (!once);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
System.err.println("Thread interrupted: " + Thread.currentThread().getName());
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws InterruptedException {
|
||||
final boolean once = args.length > 0 && args[0].equals("once");
|
||||
|
||||
final Thread[] threads = new Thread[NUM_THREADS];
|
||||
for (int i = 0; i < NUM_THREADS; i++) {
|
||||
threads[i] = new Thread(() -> do_work(once), "MemoryTask-" + i);
|
||||
threads[i].start();
|
||||
}
|
||||
|
||||
for (Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
24
test/test/nativemem/CallsMallocCalloc.java
Normal file
24
test/test/nativemem/CallsMallocCalloc.java
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package test.nativemem;
|
||||
|
||||
public class CallsMallocCalloc {
|
||||
|
||||
private static final int MALLOC_SIZE = 1999993; // Prime size, useful in assertions.
|
||||
private static final int CALLOC_SIZE = 2000147;
|
||||
|
||||
public static void main(String[] args) throws InterruptedException {
|
||||
final boolean once = args.length > 0 && args[0].equals("once");
|
||||
|
||||
do {
|
||||
Native.malloc(MALLOC_SIZE);
|
||||
Native.calloc(1, CALLOC_SIZE);
|
||||
|
||||
// allocate every 1 second.
|
||||
Thread.sleep(1000);
|
||||
} while (!once);
|
||||
}
|
||||
}
|
||||
24
test/test/nativemem/CallsRealloc.java
Normal file
24
test/test/nativemem/CallsRealloc.java
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package test.nativemem;
|
||||
|
||||
public class CallsRealloc {
|
||||
|
||||
private static final int MALLOC_SIZE = 1999993; // Prime size, useful in assertions.
|
||||
private static final int REALLOC_SIZE = 30000170;
|
||||
|
||||
public static void main(String[] args) throws InterruptedException {
|
||||
final boolean once = args.length > 0 && args[0].equals("once");
|
||||
|
||||
do {
|
||||
long addr = Native.malloc(MALLOC_SIZE);
|
||||
long reallocd = Native.realloc(addr, REALLOC_SIZE);
|
||||
|
||||
// allocate every 1 second.
|
||||
Thread.sleep(1000);
|
||||
} while (!once);
|
||||
}
|
||||
}
|
||||
20
test/test/nativemem/Native.java
Normal file
20
test/test/nativemem/Native.java
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package test.nativemem;
|
||||
|
||||
public class Native {
|
||||
static {
|
||||
System.loadLibrary("jnimalloc");
|
||||
}
|
||||
|
||||
public static native long malloc(int size);
|
||||
|
||||
public static native long realloc(long addr, int size);
|
||||
|
||||
public static native long calloc(long num, int size);
|
||||
|
||||
public static native long free(long addr);
|
||||
}
|
||||
125
test/test/nativemem/NativememTests.java
Normal file
125
test/test/nativemem/NativememTests.java
Normal file
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* Copyright The async-profiler authors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
package test.nativemem;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import one.jfr.JfrReader;
|
||||
import one.jfr.StackTrace;
|
||||
import one.jfr.event.MallocEvent;
|
||||
|
||||
import one.profiler.test.Assert;
|
||||
import one.profiler.test.Os;
|
||||
import one.profiler.test.Output;
|
||||
import one.profiler.test.Test;
|
||||
import one.profiler.test.TestProcess;
|
||||
|
||||
public class NativememTests {
|
||||
|
||||
private static final int MALLOC_SIZE = 1999993;
|
||||
private static final int CALLOC_SIZE = 2000147;
|
||||
private static final int REALLOC_SIZE = 30000170;
|
||||
|
||||
@Test(mainClass = CallsMallocCalloc.class, os = Os.LINUX, agentArgs = "start,nativemem,total,collapsed,file=%f", args = "once")
|
||||
public void canAgentTraceMallocCalloc(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%f");
|
||||
|
||||
Assert.isEqual(out.samples("Java_test_nativemem_Native_malloc"), MALLOC_SIZE);
|
||||
Assert.isEqual(out.samples("Java_test_nativemem_Native_calloc"), CALLOC_SIZE);
|
||||
}
|
||||
|
||||
@Test(mainClass = CallsMallocCalloc.class, os = Os.LINUX, agentArgs = "start,nativemem=10000000,total,collapsed,file=%f", args = "once")
|
||||
public void canAgentFilterMallocCalloc(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%f");
|
||||
Assert.isEqual(out.samples("Java_test_nativemem_Native_malloc"), 0);
|
||||
Assert.isEqual(out.samples("Java_test_nativemem_Native_calloc"), 0);
|
||||
}
|
||||
|
||||
@Test(mainClass = CallsMallocCalloc.class, os = Os.LINUX)
|
||||
public void canAsprofTraceMallocCalloc(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-e nativemem --total -o collapsed -d 2");
|
||||
long samplesMalloc = out.samples("Java_test_nativemem_Native_malloc");
|
||||
long samplesCalloc = out.samples("Java_test_nativemem_Native_calloc");
|
||||
|
||||
Assert.isGreater(samplesMalloc, 0);
|
||||
Assert.isGreater(samplesCalloc, 0);
|
||||
Assert.isEqual(samplesMalloc % MALLOC_SIZE, 0);
|
||||
Assert.isEqual(samplesCalloc % CALLOC_SIZE, 0);
|
||||
}
|
||||
|
||||
@Test(mainClass = CallsRealloc.class, agentArgs = "start,nativemem,total,collapsed,file=%f", args = "once", os = Os.LINUX)
|
||||
public void canAgentTraceRealloc(TestProcess p) throws Exception {
|
||||
Output out = p.waitForExit("%f");
|
||||
|
||||
Assert.isEqual(out.samples("Java_test_nativemem_Native_malloc"), MALLOC_SIZE);
|
||||
Assert.isEqual(out.samples("Java_test_nativemem_Native_realloc"), REALLOC_SIZE);
|
||||
}
|
||||
|
||||
@Test(mainClass = CallsRealloc.class, os = Os.LINUX)
|
||||
public void canAsprofTraceRealloc(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-e nativemem --total -o collapsed -d 2");
|
||||
long samplesMalloc = out.samples("Java_test_nativemem_Native_malloc");
|
||||
long samplesRealloc = out.samples("Java_test_nativemem_Native_realloc");
|
||||
|
||||
Assert.isGreater(samplesMalloc, 0);
|
||||
Assert.isGreater(samplesRealloc, 0);
|
||||
Assert.isEqual(samplesMalloc % MALLOC_SIZE, 0);
|
||||
Assert.isEqual(samplesRealloc % REALLOC_SIZE, 0);
|
||||
}
|
||||
|
||||
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX)
|
||||
public void canAsprofTraceAllNoLeak(TestProcess p) throws Exception {
|
||||
Output out = p.profile("-e nativemem --total -o collapsed -d 2");
|
||||
|
||||
long samplesMalloc = out.samples("Java_test_nativemem_Native_malloc");
|
||||
long samplesCalloc = out.samples("Java_test_nativemem_Native_calloc");
|
||||
long samplesRealloc = out.samples("Java_test_nativemem_Native_realloc");
|
||||
|
||||
Assert.isGreater(samplesMalloc, 0);
|
||||
Assert.isGreater(samplesCalloc, 0);
|
||||
Assert.isGreater(samplesRealloc, 0);
|
||||
Assert.isEqual(samplesMalloc % MALLOC_SIZE, 0);
|
||||
Assert.isEqual(samplesCalloc % CALLOC_SIZE, 0);
|
||||
Assert.isEqual(samplesRealloc % REALLOC_SIZE, 0);
|
||||
}
|
||||
|
||||
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,nativemem,file=%f.jfr")
|
||||
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,nativemem,total,file=%f.jfr")
|
||||
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,nativemem=1,total,file=%f.jfr")
|
||||
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,nativemem=10M,total,file=%f.jfr")
|
||||
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,cpu,alloc,nativemem,total,file=%f.jfr")
|
||||
public void livenessJfrHasStacks(TestProcess p) throws Exception {
|
||||
p.waitForExit();
|
||||
String filename = p.getFile("%f").toPath().toString();
|
||||
|
||||
try (JfrReader r = new JfrReader(filename)) {
|
||||
List<MallocEvent> events = r.readAllEvents(MallocEvent.class);
|
||||
assert !events.isEmpty() : "No MallocEvent events found in the JFR output";
|
||||
|
||||
long totalAllocated = 0;
|
||||
Map<Long, MallocEvent> addresses = new HashMap<>();
|
||||
for (MallocEvent event : events) {
|
||||
// only interested in specific sizes.
|
||||
if (event.size != 0 && event.size != MALLOC_SIZE && event.size != CALLOC_SIZE
|
||||
&& event.size != REALLOC_SIZE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
totalAllocated += event.size;
|
||||
if (event.size > 0) {
|
||||
addresses.put(event.address, event);
|
||||
} else {
|
||||
addresses.remove(event.address);
|
||||
}
|
||||
}
|
||||
|
||||
Assert.isGreater(totalAllocated, 0);
|
||||
Assert.isEqual(addresses.size(), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user