Native memory profiler (#1064)

This commit is contained in:
Kerem Kat
2024-12-16 14:58:59 +01:00
committed by GitHub
parent 5ebc82dd04
commit 78f78cf681
40 changed files with 1014 additions and 117 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

1
.gitignore vendored
View File

@@ -7,3 +7,4 @@
.vscode .vscode
*.iml *.iml
/src/api/**/*.class /src/api/**/*.class
.gdb_history

View File

@@ -56,7 +56,7 @@ CPP_TEST_HEADER := test/native/testRunner.hpp
CPP_TEST_INCLUDES := -Isrc -Itest/native CPP_TEST_INCLUDES := -Isrc -Itest/native
ifeq ($(JAVA_HOME),) ifeq ($(JAVA_HOME),)
export JAVA_HOME:=$(shell java -cp . JavaHome) JAVA_HOME:=$(shell java -cp . JavaHome)
endif endif
OS:=$(shell uname -s) OS:=$(shell uname -s)
@@ -197,6 +197,7 @@ build-test: build-test-cpp build-test-java
build-test-libs: build-test-libs:
@mkdir -p $(TEST_LIB_DIR) @mkdir -p $(TEST_LIB_DIR)
$(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libreladyn.$(SOEXT) test/native/libs/reladyn.c $(CC) -shared -fPIC -o $(TEST_LIB_DIR)/libreladyn.$(SOEXT) test/native/libs/reladyn.c
$(CC) -shared -fPIC $(INCLUDES) -Isrc -o $(TEST_LIB_DIR)/libjnimalloc.$(SOEXT) test/native/libs/jnimalloc.c
test-cpp: build-test-cpp test-cpp: build-test-cpp
echo "Running cpp tests..." echo "Running cpp tests..."
@@ -204,7 +205,8 @@ test-cpp: build-test-cpp
test-java: build-test-java test-java: build-test-java
echo "Running tests against $(LIB_PROFILER)" echo "Running tests against $(LIB_PROFILER)"
$(JAVA) $(TEST_FLAGS) -ea -cp "build/test.jar:build/jar/*:build/lib/*" one.profiler.test.Runner $(TESTS)
$(JAVA) "-Djava.library.path=$(TEST_LIB_DIR)" $(TEST_FLAGS) -ea -cp "build/test.jar:build/jar/*:build/lib/*" one.profiler.test.Runner $(TESTS)
coverage: override FAT_BINARY=false coverage: override FAT_BINARY=false
coverage: clean-coverage coverage: clean-coverage

View File

@@ -13,7 +13,8 @@ What can be profiled:
- CPU time - CPU time
- Allocations in Java Heap - Allocations in Java Heap
- Contented locks - Native memory allocations and leaks
- Contended locks
- Hardware and software performance counters like cache misses, page faults, context switches - Hardware and software performance counters like cache misses, page faults, context switches
- and [more](docs/ProfilingModes.md). - and [more](docs/ProfilingModes.md).
@@ -27,8 +28,13 @@ Current release (3.0):
- Linux x64: [async-profiler-3.0-linux-x64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-linux-x64.tar.gz) - Linux x64: [async-profiler-3.0-linux-x64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-linux-x64.tar.gz)
- Linux arm64: [async-profiler-3.0-linux-arm64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-linux-arm64.tar.gz) - Linux arm64: [async-profiler-3.0-linux-arm64.tar.gz](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-linux-arm64.tar.gz)
- macOS x64/arm64: [async-profiler-3.0-macos.zip](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-macos.zip) - macOS x64/arm64: [async-profiler-3.0-macos.zip](https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-macos.zip)
- Converters between profile formats: [converter.jar](https://github.com/async-profiler/async-profiler/releases/download/v3.0/converter.jar) - Converters between profile formats: [converter.jar](https://github.com/async-profiler/async-profiler/releases/download/v3.0/converter.jar)
(JFR to Flame Graph, JFR to pprof, collapsed stacks to Flame Graph)
| From | html | collapsed | pprof | pb.gz |
| --------- | ---- | --------- | ----- | ----- |
| collapsed | ✅ | ✅ | ❌ | ❌ |
| html | ✅ | ✅ | ❌ | ❌ |
| jfr | ✅ | ✅ | ✅ | ✅ |
[Previous releases](https://github.com/async-profiler/async-profiler/releases) [Previous releases](https://github.com/async-profiler/async-profiler/releases)

View File

@@ -1,4 +1,4 @@
# Converter usage & demo # Converter Usage
async-profiler provides a converter utility to convert the profile output to other popular formats. async-profiler async-profiler provides a converter utility to convert the profile output to other popular formats. async-profiler
provides `jfrconv` as part of the compressed package which is found in the same location as the `asprof` binary. A provides `jfrconv` as part of the compressed package which is found in the same location as the `asprof` binary. A
@@ -6,18 +6,20 @@ standalone converter binary is also available [here](ttps://github.com/async-pro
## Supported conversions ## Supported conversions
- collapsed -> html, collapsed | From | html | collapsed | pprof | pb.gz |
- html -> html, collapsed | --------- | ---- | --------- | ----- | ----- |
- jfr -> html, collapsed, pprof, pb.gz | collapsed | ✅ | ✅ | ❌ | ❌ |
| html | ✅ | ✅ | ❌ | ❌ |
| jfr | ✅ | ✅ | ✅ | ✅ |
## Usage ## Usage
`jfrconv [options] <input> [<input>...] <output>` ```
jfrconv [options] <input> [<input>...] <output>
```
The output format specified can be only one at a time for conversion from one format to another. The output format specified can be only one at a time for conversion from one format to another.
### Available arguments
``` ```
Conversion options: Conversion options:
-o --output FORMAT, -o can be omitted if the output file extension unambiguously determines the format, e.g. profile.collapsed -o --output FORMAT, -o can be omitted if the output file extension unambiguously determines the format, e.g. profile.collapsed
@@ -42,6 +44,8 @@ JFR options:
--wall Generate only Wall clock profile during conversion --wall Generate only Wall clock profile during conversion
--alloc Generate only Allocation profile during conversion --alloc Generate only Allocation profile during conversion
--live Build allocation profile from live objects only during conversion --live Build allocation profile from live objects only during conversion
--nativemem Generate native memory allocation profile
--leak Only include memory leaks in nativemem
--lock Generate only Lock contention profile during conversion --lock Generate only Lock contention profile during conversion
-t --threads Split stack traces by threads -t --threads Split stack traces by threads
-s --state LIST Filter thread states: runnable, sleeping, default. State name is case insensitive -s --state LIST Filter thread states: runnable, sleeping, default. State name is case insensitive
@@ -74,13 +78,13 @@ Flame Graph options:
--highlight REGEX Highlight frames matching the given pattern --highlight REGEX Highlight frames matching the given pattern
``` ```
### Example usages with `jfrconv` ## `jfrconv` Examples
This section explains how the binary `jfrconv` can be used which exists in the same bin folder as `jfrconv` is built into the same location as the `asprof` binary.
`asprof`binary.
The below command will generate a foo.html. If no output file is specified, it defaults to a ### Generate flamegraph from jfr
Flame Graph output.
If no output file is specified, it defaults to a Flame Graph output.
``` ```
jfrconv foo.jfr jfrconv foo.jfr
@@ -92,44 +96,34 @@ Flame Graph will have an aggregation of both in the view. Such a view wouldn't m
hence it is advisable to use JFR conversion filter options like `--cpu` to filter out events hence it is advisable to use JFR conversion filter options like `--cpu` to filter out events
during a conversion. during a conversion.
```
jfrconv --cpu foo.jfr -o foo.html
```
or
``` ```
jfrconv --cpu foo.jfr jfrconv --cpu foo.jfr
# which is equivalent to:
# jfrconv --cpu -o flamegraph foo.jfr foo.html
``` ```
for HTML output as HTML is the default format for conversion from JFR. for HTML output as HTML is the default format for conversion from JFR.
In case the conversion output is a Flame Graph, it can be further formatted with the use of flags #### Flame Graph options
specified above under `Flame Graph options`. The below command(s) will add a title string named `Title`
to the Flame Graph instead of the default `Flame Graph` title and also will reverse the graph view To add a custom title to the generated Flame Graph, use `--title`, which has the default value `Flame Graph`:
by reversing the stack traces.
``` ```
jfrconv --cpu foo.jfr foo.html -r --title Title jfrconv --cpu foo.jfr foo.html -r --title "Custom Title"
``` ```
or ### Other formats
``` `jfrconv` supports converting a JFR file to `collapsed`, `pprof` and `pb.gz` formats as well.
jfrconv --cpu foo.jfr --reverse --title Title
```
These are few common use cases. Similarly, a JFR output can be converted to `collapsed`, `pprof` and ## Standalone converter examples
`pb.gz` formats based on specific needs.
### Example usages with standalone converter Standalone converter jar is provided in
[Download](https://github.com/async-profiler/async-profiler/?tab=readme-ov-file#Download). It accepts the same parameters as `jfrconv`.
The usage with standalone converter jar provided in
[Download](https://github.com/async-profiler/async-profiler/?tab=readme-ov-file#Download)
section is very similar to `jfrconv`.
Below is an example usage: Below is an example usage:
`java -cp /path/to/standalone-converter-jar --cpu foo.jfr --reverse --title Application CPU profile` ```
java -cp /path/to/standalone-converter-jar --cpu foo.jfr --reverse --title "Application CPU profile"
The only difference lies in how the binary is used. ```

View File

@@ -28,12 +28,13 @@ The below options are `action`s for async-profiler and common for both `asprof`
| asprof | Launch as agent | Description | | asprof | Launch as agent | Description |
| ------------------ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ------------------ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `-o fmt` | `fmt` | Specifies what information to dump when profiling ends. For various dump option details, please refer to [Dump Option Appendix](#dump-option). | | `-o fmt` | `fmt` | Specifies what information to dump when profiling ends. For various dump option details, please refer to [Dump Option Appendix](#dump-option). |
| `-d N` | N/A | asprof-only option designed for interactive use. It is a shortcut for running 3 actions: start, sleep for N seconds, stop. If no `start`, `resume`, `stop` or `status` option is given, the profiler will run for the specified period of time and then automatically stop.<br>Example: `asprof -d 30 <pid>`` | | `-d N` | N/A | asprof-only option designed for interactive use. It is a shortcut for running 3 actions: start, sleep for N seconds, stop. If no `start`, `resume`, `stop` or `status` option is given, the profiler will run for the specified period of time and then automatically stop.<br/>Example: `asprof -d 30 <pid>`` |
| `--timeout N` | `timeout=N` | The profiling duration, in seconds. The profiler will run for the specified period of time and then automatically stop.<br>Example: `java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,timeout=30,file=profile.html <application>` | | `--timeout N` | `timeout=N` | The profiling duration, in seconds. The profiler will run for the specified period of time and then automatically stop.<br/>Example: `java -agentpath:/path/to/libasyncProfiler.so=start,event=cpu,timeout=30,file=profile.html <application>` |
| `-e --event EVENT` | `event=EVENT` | The profiling event: `cpu`, `alloc`, `lock`, `cache-misses` etc. Use `list` to see the complete list of available events.<br>Please refer to [Special Event Types](https://github.com/async-profiler/async-profiler/blob/master/docs/ProfilingModes.md#special-event-types-supported-on-linux) for additional information. | | `-e --event EVENT` | `event=EVENT` | The profiling event: `cpu`, `alloc`, `nativemem`, `lock`, `cache-misses` etc. Use `list` to see the complete list of available events.</br>Please refer to [Special Event Types](https://github.com/async-profiler/async-profiler/blob/master/docs/ProfilingModes.md#special-event-types-supported-on-linux) for additional information. |
| `-i --interval N` | `interval=N` | Interval has different meaning depending on the event. For CPU profiling, it's CPU time. In wall clock mode, it's wall clock time. For Java method profiling or native function profiling, it's number of calls. For PMU profiling, it's number of events.<br>Example: `asprof -e cpu -i 500us 8983` | | `-i --interval N` | `interval=N` | Interval has different meaning depending on the event. For CPU profiling, it's CPU time. In wall clock mode, it's wall clock time. For Java method profiling or native function profiling, it's number of calls. For PMU profiling, it's number of events.<br/>Example: `asprof -e cpu -i 500us 8983` |
| `--alloc N` | `alloc=N` | Allocation profiling interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). | | `--alloc N` | `alloc=N` | Allocation profiling interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). |
| `--live` | `live` | Retain allocation samples with live objects only (object that have not been collected by the end of profiling session). Useful for finding Java heap memory leaks. | | `--live` | `live` | Retain allocation samples with live objects only (object that have not been collected by the end of profiling session). Useful for finding Java heap memory leaks. |
| `--nativemem N` | `nativemem=N` | Native memory allocation profiling. N, if specified is the interval in bytes or in other units, if N is followed by `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). Default N is 0. |
| `--lock DURATION` | `lock=DURATION` | In lock profiling mode, sample contended locks when total lock duration overflows the threshold | | `--lock DURATION` | `lock=DURATION` | In lock profiling mode, sample contended locks when total lock duration overflows the threshold |
| `-j N` | `jstackdepth=N` | Sets the maximum stack depth. The default is 2048.<br>Example: `asprof -j 30 8983` | | `-j N` | `jstackdepth=N` | Sets the maximum stack depth. The default is 2048.<br>Example: `asprof -j 30 8983` |
| `-I PATTERN` | `include=PATTERN` | Filter stack traces by the given pattern(s). `-I` defines the name pattern that _must_ be present in the stack traces. `-I` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -I 'Primes.*' -I 'java/*' 8983` | | `-I PATTERN` | `include=PATTERN` | Filter stack traces by the given pattern(s). `-I` defines the name pattern that _must_ be present in the stack traces. `-I` can be specified multiple times. A pattern may begin or end with a star `*` that denotes any (possibly empty) sequence of characters.<br>Example: `asprof -I 'Primes.*' -I 'java/*' 8983` |

View File

@@ -89,6 +89,35 @@ $ gdb $JAVA_HOME/lib/server/libjvm.so -ex 'info address UseG1GC'
This command's output will either contain `Symbol "UseG1GC" is at 0xxxxx` This command's output will either contain `Symbol "UseG1GC" is at 0xxxxx`
or `No symbol "UseG1GC" in current context`. or `No symbol "UseG1GC" in current context`.
## Native memory leaks
The profiling mode `nativemem` records `malloc`, `realloc`, `calloc` and `free` calls with the addresses, so that allocations can be matched with frees. This helps to focus the profile report only on unfreed allocations, which are the likely to be a source of a memory leak.
Example:
```
asprof start -e nativemem -f app.jfr <YourApp>
# asprof start --nativemem N -f app.jfr <YourApp>
asprof stop <YourApp>
```
Now we need to process the jfr file, to find native memory leaks:
```
# --total for bytes, default counts invocations.
jfrconv --total --nativemem --leak app.jfr app-leak.html
# No leak analysis, include all native allocations:
jfrconv --total --nativemem app.jfr app-malloc.html
```
When `--leak` option is used, the generated flame graph will show allocations without matching `free` calls:
![nativemem flamegraph](../.assets/images/nativemem_flamegraph.png)
The overhead of `nativemem` profiling depends on the number of native allocations, but is usually small enough even for production use. If required, the overhead can be reduced by configuring the profiling interval. E.g. if you add `nativemem=1m` profiler option, allocation samples will be limited to at most one sample per allocated megabyte.
## Wall-clock profiling ## Wall-clock profiling
`-e wall` option tells async-profiler to sample all threads equally every given `-e wall` option tells async-profiler to sample all threads equally every given

View File

@@ -7,6 +7,14 @@
#define _ARCH_H #define _ARCH_H
# ifndef likely
# define likely(x) (__builtin_expect(!!(x), 1))
# endif
# ifndef unlikely
# define unlikely(x) (__builtin_expect(!!(x), 0))
# endif
typedef unsigned char u8; typedef unsigned char u8;
typedef unsigned short u16; typedef unsigned short u16;
typedef unsigned int u32; typedef unsigned int u32;

View File

@@ -215,6 +215,8 @@ Error Arguments::parse(const char* args) {
msg = "event must not be empty"; msg = "event must not be empty";
} else if (strcmp(value, EVENT_ALLOC) == 0) { } else if (strcmp(value, EVENT_ALLOC) == 0) {
if (_alloc < 0) _alloc = 0; if (_alloc < 0) _alloc = 0;
} else if (strcmp(value, EVENT_NATIVEMEM) == 0) {
if (_nativemem < 0) _nativemem = 0;
} else if (strcmp(value, EVENT_LOCK) == 0) { } else if (strcmp(value, EVENT_LOCK) == 0) {
if (_lock < 0) _lock = DEFAULT_LOCK_INTERVAL; if (_lock < 0) _lock = DEFAULT_LOCK_INTERVAL;
} else if (_event != NULL) { } else if (_event != NULL) {
@@ -237,6 +239,9 @@ Error Arguments::parse(const char* args) {
CASE("alloc") CASE("alloc")
_alloc = value == NULL ? 0 : parseUnits(value, BYTES); _alloc = value == NULL ? 0 : parseUnits(value, BYTES);
CASE("nativemem")
_nativemem = value == NULL ? 0 : parseUnits(value, BYTES);
CASE("lock") CASE("lock")
_lock = value == NULL ? 0 : parseUnits(value, NANOS); _lock = value == NULL ? 0 : parseUnits(value, NANOS);
@@ -420,7 +425,7 @@ Error Arguments::parse(const char* args) {
return Error(msg); return Error(msg);
} }
if (_event == NULL && _alloc < 0 && _lock < 0 && _wall < 0) { if (_event == NULL && _alloc < 0 && _lock < 0 && _wall < 0 && _nativemem < 0) {
_event = EVENT_CPU; _event = EVENT_CPU;
} }

View File

@@ -14,12 +14,13 @@ const long DEFAULT_ALLOC_INTERVAL = 524287; // 512 KiB
const long DEFAULT_LOCK_INTERVAL = 10000; // 10 us const long DEFAULT_LOCK_INTERVAL = 10000; // 10 us
const int DEFAULT_JSTACKDEPTH = 2048; const int DEFAULT_JSTACKDEPTH = 2048;
const char* const EVENT_CPU = "cpu"; const char* const EVENT_CPU = "cpu";
const char* const EVENT_ALLOC = "alloc"; const char* const EVENT_ALLOC = "alloc";
const char* const EVENT_LOCK = "lock"; const char* const EVENT_NATIVEMEM = "nativemem";
const char* const EVENT_WALL = "wall"; const char* const EVENT_LOCK = "lock";
const char* const EVENT_CTIMER = "ctimer"; const char* const EVENT_WALL = "wall";
const char* const EVENT_ITIMER = "itimer"; const char* const EVENT_CTIMER = "ctimer";
const char* const EVENT_ITIMER = "itimer";
#define SHORT_ENUM __attribute__((__packed__)) #define SHORT_ENUM __attribute__((__packed__))
@@ -161,6 +162,7 @@ class Arguments {
int _timeout; int _timeout;
long _interval; long _interval;
long _alloc; long _alloc;
long _nativemem;
long _lock; long _lock;
long _wall; long _wall;
int _jstackdepth; int _jstackdepth;
@@ -212,6 +214,7 @@ class Arguments {
_timeout(0), _timeout(0),
_interval(0), _interval(0),
_alloc(-1), _alloc(-1),
_nativemem(-1),
_lock(-1), _lock(-1),
_wall(-1), _wall(-1),
_jstackdepth(DEFAULT_JSTACKDEPTH), _jstackdepth(DEFAULT_JSTACKDEPTH),

View File

@@ -107,20 +107,6 @@ void CodeCache::sort() {
if (_max_address == NO_MAX_ADDRESS) _max_address = _blobs[_count - 1]._end; if (_max_address == NO_MAX_ADDRESS) _max_address = _blobs[_count - 1]._end;
} }
void CodeCache::mark(NamePredicate predicate, char value) {
for (int i = 0; i < _count; i++) {
const char* blob_name = _blobs[i]._name;
if (blob_name != NULL && predicate(blob_name)) {
NativeFunc::mark(blob_name, value);
}
}
if (value == MARK_VM_RUNTIME && _name != NULL) {
// In case a library has no debug symbols
NativeFunc::mark(_name, value);
}
}
CodeBlob* CodeCache::findBlob(const char* name) { CodeBlob* CodeCache::findBlob(const char* name) {
for (int i = 0; i < _count; i++) { for (int i = 0; i < _count; i++) {
const char* blob_name = _blobs[i]._name; const char* blob_name = _blobs[i]._name;
@@ -184,11 +170,26 @@ const void* CodeCache::findSymbolByPrefix(const char* prefix, int prefix_len) {
void CodeCache::addImport(void** entry, const char* name) { void CodeCache::addImport(void** entry, const char* name) {
switch (name[0]) { switch (name[0]) {
case 'c':
if (strcmp(name, "calloc") == 0) {
_imports[im_calloc] = entry;
}
break;
case 'd': case 'd':
if (strcmp(name, "dlopen") == 0) { if (strcmp(name, "dlopen") == 0) {
_imports[im_dlopen] = entry; _imports[im_dlopen] = entry;
} }
break; break;
case 'f':
if (strcmp(name, "free") == 0) {
_imports[im_free] = entry;
}
break;
case 'm':
if (strcmp(name, "malloc") == 0) {
_imports[im_malloc] = entry;
}
break;
case 'p': case 'p':
if (strcmp(name, "pthread_create") == 0) { if (strcmp(name, "pthread_create") == 0) {
_imports[im_pthread_create] = entry; _imports[im_pthread_create] = entry;
@@ -200,6 +201,11 @@ void CodeCache::addImport(void** entry, const char* name) {
_imports[im_poll] = entry; _imports[im_poll] = entry;
} }
break; break;
case 'r':
if (strcmp(name, "realloc") == 0) {
_imports[im_realloc] = entry;
}
break;
} }
} }

View File

@@ -12,8 +12,6 @@
#define NO_MIN_ADDRESS ((const void*)-1) #define NO_MIN_ADDRESS ((const void*)-1)
#define NO_MAX_ADDRESS ((const void*)0) #define NO_MAX_ADDRESS ((const void*)0)
typedef bool (*NamePredicate)(const char* name);
const int INITIAL_CODE_CACHE_CAPACITY = 1000; const int INITIAL_CODE_CACHE_CAPACITY = 1000;
const int MAX_NATIVE_LIBS = 2048; const int MAX_NATIVE_LIBS = 2048;
@@ -24,13 +22,18 @@ enum ImportId {
im_pthread_exit, im_pthread_exit,
im_pthread_setspecific, im_pthread_setspecific,
im_poll, im_poll,
im_malloc,
im_calloc,
im_realloc,
im_free,
NUM_IMPORTS NUM_IMPORTS
}; };
enum Mark { enum Mark {
MARK_VM_RUNTIME = 1, MARK_VM_RUNTIME = 1,
MARK_INTERPRETER = 2, MARK_INTERPRETER = 2,
MARK_COMPILER_ENTRY = 3 MARK_COMPILER_ENTRY = 3,
MARK_ASYNC_PROFILER = 4, // async-profiler internals such as native hooks.
}; };
@@ -159,7 +162,21 @@ class CodeCache {
void add(const void* start, int length, const char* name, bool update_bounds = false); void add(const void* start, int length, const char* name, bool update_bounds = false);
void updateBounds(const void* start, const void* end); void updateBounds(const void* start, const void* end);
void sort(); void sort();
void mark(NamePredicate predicate, char value);
template <typename NamePredicate>
inline void mark(NamePredicate predicate, char value) {
for (int i = 0; i < _count; i++) {
const char* blob_name = _blobs[i]._name;
if (blob_name != NULL && predicate(blob_name)) {
NativeFunc::mark(blob_name, value);
}
}
if (value == MARK_VM_RUNTIME && _name != NULL) {
// In case a library has no debug symbols
NativeFunc::mark(_name, value);
}
}
void addImport(void** entry, const char* name); void addImport(void** entry, const char* name);
void** findImport(ImportId id); void** findImport(ImportId id);

View File

@@ -97,6 +97,8 @@ public class Main {
" --wall Wall clock profile\n" + " --wall Wall clock profile\n" +
" --alloc Allocation profile\n" + " --alloc Allocation profile\n" +
" --live Live object profile\n" + " --live Live object profile\n" +
" --nativemem malloc profile\n" +
" --leak Only include memory leaks in nativemem\n" +
" --lock Lock contention profile\n" + " --lock Lock contention profile\n" +
" -t --threads Split stack traces by threads\n" + " -t --threads Split stack traces by threads\n" +
" -s --state LIST Filter thread states: runnable, sleeping\n" + " -s --state LIST Filter thread states: runnable, sleeping\n" +

View File

@@ -25,6 +25,8 @@ public class Arguments {
public boolean cpu; public boolean cpu;
public boolean wall; public boolean wall;
public boolean alloc; public boolean alloc;
public boolean nativemem;
public boolean leak;
public boolean live; public boolean live;
public boolean lock; public boolean lock;
public boolean threads; public boolean threads;

View File

@@ -21,11 +21,17 @@ import static one.convert.Frame.*;
public abstract class JfrConverter extends Classifier { public abstract class JfrConverter extends Classifier {
protected final JfrReader jfr; protected final JfrReader jfr;
protected final Arguments args; protected final Arguments args;
protected final IEventAggregator eventAggregator;
protected Dictionary<String> methodNames; protected Dictionary<String> methodNames;
public JfrConverter(JfrReader jfr, Arguments args) { public JfrConverter(JfrReader jfr, Arguments args) {
boolean leakDetection = args.nativemem && args.leak;
this.jfr = jfr; this.jfr = jfr;
this.args = args; this.args = args;
IEventAggregator agg = new EventAggregator(args.threads, args.total);
this.eventAggregator = leakDetection ? new MallocLeakAggregator(agg) : agg;
} }
public void convert() throws IOException { public void convert() throws IOException {
@@ -33,19 +39,35 @@ public abstract class JfrConverter extends Classifier {
while (jfr.hasMoreChunks()) { while (jfr.hasMoreChunks()) {
// Reset method dictionary, since new chunk may have different IDs // Reset method dictionary, since new chunk may have different IDs
methodNames = new Dictionary<>(); methodNames = new Dictionary<>();
collectEvents();
eventAggregator.setFactor(args.lock ? 1e9 / jfr.ticksPerSec : 1.0);
eventAggregator.finishChunk();
if (args.grain > 0) {
eventAggregator.coarsen(args.grain);
}
convertChunk(); convertChunk();
eventAggregator.resetChunk();
} }
finalizeAggregator();
} }
protected abstract void convertChunk() throws IOException; private void finalizeAggregator()throws IOException {
eventAggregator.finish();
convertChunk();
eventAggregator.resetChunk();
}
protected EventAggregator collectEvents() throws IOException { protected abstract void convertChunk();
EventAggregator agg = new EventAggregator(args.threads, args.total, args.lock ? 1e9 / jfr.ticksPerSec : 1.0);
Class<? extends Event> eventClass = protected void collectEvents() throws IOException {
args.live ? LiveObject.class : Class<? extends Event> eventClass = args.live ? LiveObject.class
args.alloc ? AllocationSample.class : : args.alloc ? AllocationSample.class
args.lock ? ContendedLock.class : ExecutionSample.class; : args.lock ? ContendedLock.class
: args.nativemem ? MallocEvent.class
: ExecutionSample.class;
BitSet threadStates = null; BitSet threadStates = null;
if (args.state != null) { if (args.state != null) {
@@ -62,19 +84,13 @@ public abstract class JfrConverter extends Classifier {
long startTicks = args.from != 0 ? toTicks(args.from) : Long.MIN_VALUE; long startTicks = args.from != 0 ? toTicks(args.from) : Long.MIN_VALUE;
long endTicks = args.to != 0 ? toTicks(args.to) : Long.MAX_VALUE; long endTicks = args.to != 0 ? toTicks(args.to) : Long.MAX_VALUE;
for (Event event; (event = jfr.readEvent(eventClass)) != null; ) { for (Event event; (event = jfr.readEvent(eventClass)) != null;) {
if (event.time >= startTicks && event.time <= endTicks) { if (event.time >= startTicks && event.time <= endTicks) {
if (threadStates == null || threadStates.get(((ExecutionSample) event).threadState)) { if (threadStates == null || threadStates.get(((ExecutionSample) event).threadState)) {
agg.collect(event); eventAggregator.collect(event);
} }
} }
} }
if (args.grain > 0) {
agg.coarsen(args.grain);
}
return agg;
} }
protected int toThreadState(String name) { protected int toThreadState(String name) {
@@ -163,8 +179,8 @@ public abstract class JfrConverter extends Classifier {
protected String getThreadName(int tid) { protected String getThreadName(int tid) {
String threadName = jfr.threads.get(tid); String threadName = jfr.threads.get(tid);
return threadName == null ? "[tid=" + tid + ']' : return threadName == null ? "[tid=" + tid + ']'
threadName.startsWith("[tid=") ? threadName : '[' + threadName + " tid=" + tid + ']'; : threadName.startsWith("[tid=") ? threadName : '[' + threadName + " tid=" + tid + ']';
} }
protected String toJavaClassName(byte[] symbol, int start, boolean dotted) { protected String toJavaClassName(byte[] symbol, int start, boolean dotted) {

View File

@@ -9,7 +9,7 @@ import one.jfr.JfrReader;
import one.jfr.StackTrace; import one.jfr.StackTrace;
import one.jfr.event.AllocationSample; import one.jfr.event.AllocationSample;
import one.jfr.event.Event; import one.jfr.event.Event;
import one.jfr.event.EventAggregator; import one.jfr.event.IEventAggregator;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
@@ -30,8 +30,8 @@ public class JfrToFlame extends JfrConverter {
} }
@Override @Override
protected void convertChunk() throws IOException { protected void convertChunk() {
collectEvents().forEach(new EventAggregator.ValueVisitor() { eventAggregator.forEach(new IEventAggregator.ValueVisitor() {
final CallStack stack = new CallStack(); final CallStack stack = new CallStack();
@Override @Override

View File

@@ -8,7 +8,7 @@ package one.convert;
import one.jfr.JfrReader; import one.jfr.JfrReader;
import one.jfr.StackTrace; import one.jfr.StackTrace;
import one.jfr.event.Event; import one.jfr.event.Event;
import one.jfr.event.EventAggregator; import one.jfr.event.IEventAggregator;
import one.proto.Proto; import one.proto.Proto;
import java.io.FileOutputStream; import java.io.FileOutputStream;
@@ -29,7 +29,9 @@ public class JfrToPprof extends JfrConverter {
super(jfr, args); super(jfr, args);
Proto sampleType; Proto sampleType;
if (args.alloc || args.live) { if (args.nativemem) {
sampleType = valueType("malloc", args.total ? "bytes" : "count");
} else if (args.alloc || args.live) {
sampleType = valueType("allocations", args.total ? "bytes" : "count"); sampleType = valueType("allocations", args.total ? "bytes" : "count");
} else if (args.lock) { } else if (args.lock) {
sampleType = valueType("locks", args.total ? "nanoseconds" : "count"); sampleType = valueType("locks", args.total ? "nanoseconds" : "count");
@@ -42,8 +44,8 @@ public class JfrToPprof extends JfrConverter {
} }
@Override @Override
protected void convertChunk() throws IOException { protected void convertChunk() {
collectEvents().forEach(new EventAggregator.ValueVisitor() { eventAggregator.forEach(new IEventAggregator.ValueVisitor() {
final Proto s = new Proto(100); final Proto s = new Proto(100);
@Override @Override

View File

@@ -73,6 +73,8 @@ public class JfrReader implements Closeable {
private int monitorEnter; private int monitorEnter;
private int threadPark; private int threadPark;
private int activeSetting; private int activeSetting;
private int malloc;
private int free;
public JfrReader(String fileName) throws IOException { public JfrReader(String fileName) throws IOException {
this.ch = FileChannel.open(Paths.get(fileName), StandardOpenOption.READ); this.ch = FileChannel.open(Paths.get(fileName), StandardOpenOption.READ);
@@ -174,6 +176,10 @@ public class JfrReader implements Closeable {
if (cls == null || cls == AllocationSample.class) return (E) readAllocationSample(true); if (cls == null || cls == AllocationSample.class) return (E) readAllocationSample(true);
} else if (type == allocationOutsideTLAB || type == allocationSample) { } else if (type == allocationOutsideTLAB || type == allocationSample) {
if (cls == null || cls == AllocationSample.class) return (E) readAllocationSample(false); if (cls == null || cls == AllocationSample.class) return (E) readAllocationSample(false);
} else if (type == malloc) {
if (cls == null || cls == MallocEvent.class) return (E) readMallocEvent(true);
} else if (type == free) {
if (cls == null || cls == MallocEvent.class) return (E) readMallocEvent(false);
} else if (type == liveObject) { } else if (type == liveObject) {
if (cls == null || cls == LiveObject.class) return (E) readLiveObject(); if (cls == null || cls == LiveObject.class) return (E) readLiveObject();
} else if (type == monitorEnter) { } else if (type == monitorEnter) {
@@ -221,6 +227,15 @@ public class JfrReader implements Closeable {
return new AllocationSample(time, tid, stackTraceId, classId, allocationSize, tlabSize); return new AllocationSample(time, tid, stackTraceId, classId, allocationSize, tlabSize);
} }
private MallocEvent readMallocEvent(boolean hasSize) {
long time = getVarlong();
int tid = getVarint();
int stackTraceId = getVarint();
long address = getVarlong();
long size = hasSize ? getVarlong() : 0;
return new MallocEvent(time, tid, stackTraceId, address, size);
}
private LiveObject readLiveObject() { private LiveObject readLiveObject() {
long time = getVarlong(); long time = getVarlong();
int tid = getVarint(); int tid = getVarint();
@@ -540,6 +555,8 @@ public class JfrReader implements Closeable {
monitorEnter = getTypeId("jdk.JavaMonitorEnter"); monitorEnter = getTypeId("jdk.JavaMonitorEnter");
threadPark = getTypeId("jdk.ThreadPark"); threadPark = getTypeId("jdk.ThreadPark");
activeSetting = getTypeId("jdk.ActiveSetting"); activeSetting = getTypeId("jdk.ActiveSetting");
malloc = getTypeId("profiler.Malloc");
free = getTypeId("profiler.Free");
registerEvent("jdk.CPULoad", CPULoad.class); registerEvent("jdk.CPULoad", CPULoad.class);
registerEvent("jdk.GCHeapSummary", GCHeapSummary.class); registerEvent("jdk.GCHeapSummary", GCHeapSummary.class);

View File

@@ -5,12 +5,12 @@
package one.jfr.event; package one.jfr.event;
public class EventAggregator { public class EventAggregator implements IEventAggregator {
private static final int INITIAL_CAPACITY = 1024; private static final int INITIAL_CAPACITY = 1024;
private final boolean threads; private final boolean threads;
private final boolean total; private final boolean total;
private final double factor; private double factor;
private Event[] keys; private Event[] keys;
private long[] samples; private long[] samples;
private long[] values; private long[] values;
@@ -18,13 +18,19 @@ public class EventAggregator {
private double fraction; private double fraction;
public EventAggregator(boolean threads, boolean total) { public EventAggregator(boolean threads, boolean total) {
this(threads, total, 1.0);
}
public EventAggregator(boolean threads, boolean total, double factor) {
this.threads = threads; this.threads = threads;
this.total = total; this.total = total;
this.resetChunk();
}
public void setFactor(double factor) {
this.factor = factor; this.factor = factor;
}
public void resetChunk() {
this.size = 0;
this.factor = 1;
this.keys = new Event[INITIAL_CAPACITY]; this.keys = new Event[INITIAL_CAPACITY];
this.samples = new long[INITIAL_CAPACITY]; this.samples = new long[INITIAL_CAPACITY];
this.values = new long[INITIAL_CAPACITY]; this.values = new long[INITIAL_CAPACITY];
@@ -59,7 +65,15 @@ public class EventAggregator {
} }
} }
public void forEach(Visitor visitor) { public void finish() {
// EventAggregator does not need finishing.
}
public void finishChunk() {
// EventAggregator does not need finishing.
}
public void forEach(IEventAggregator.Visitor visitor) {
for (int i = 0; i < keys.length; i++) { for (int i = 0; i < keys.length; i++) {
if (keys[i] != null) { if (keys[i] != null) {
visitor.visit(keys[i], samples[i], values[i]); visitor.visit(keys[i], samples[i], values[i]);
@@ -67,7 +81,7 @@ public class EventAggregator {
} }
} }
public void forEach(ValueVisitor visitor) { public void forEach(IEventAggregator.ValueVisitor visitor) {
double factor = total ? this.factor : 0.0; double factor = total ? this.factor : 0.0;
for (int i = 0; i < keys.length; i++) { for (int i = 0; i < keys.length; i++) {
if (keys[i] != null) { if (keys[i] != null) {
@@ -77,6 +91,8 @@ public class EventAggregator {
} }
public void coarsen(double grain) { public void coarsen(double grain) {
this.fraction = 0;
for (int i = 0; i < keys.length; i++) { for (int i = 0; i < keys.length; i++) {
if (keys[i] != null) { if (keys[i] != null) {
long s0 = samples[i]; long s0 = samples[i];
@@ -131,12 +147,4 @@ public class EventAggregator {
samples = newSamples; samples = newSamples;
values = newValues; values = newValues;
} }
public interface Visitor {
void visit(Event event, long samples, long value);
}
public interface ValueVisitor {
void visit(Event event, long value);
}
} }

View File

@@ -0,0 +1,32 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.jfr.event;
public interface IEventAggregator {
void collect(Event e);
void finishChunk();
void resetChunk();
void finish();
void setFactor(double factor);
void coarsen(double grain);
void forEach(Visitor visitor);
void forEach(ValueVisitor visitor);
public interface Visitor {
void visit(Event event, long samples, long value);
}
public interface ValueVisitor {
void visit(Event event, long value);
}
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.jfr.event;
public class MallocEvent extends Event {
public final long address;
public final long size;
public MallocEvent(long time, int tid, int stackTraceId, long address, long size) {
super(time, tid, stackTraceId);
this.address = address;
this.size = size;
}
@Override
public long value() {
return size;
}
}

View File

@@ -0,0 +1,85 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package one.jfr.event;
import java.util.List;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
public class MallocLeakAggregator implements IEventAggregator {
private final IEventAggregator wrapped;
private List<Event> events;
private double grain = 0;
private double factor = 1;
Map<Long, MallocEvent> addresses = new HashMap<>();
public MallocLeakAggregator(IEventAggregator wrapped) {
this.wrapped = wrapped;
this.events = new ArrayList<Event>();
}
private List<Event> filter(List<Event> events) {
for (Event event : events) {
if (!(event instanceof MallocEvent)) {
continue;
}
MallocEvent e = (MallocEvent) event;
if (e.size > 0) {
addresses.put(e.address, e);
} else {
addresses.remove(e.address);
}
}
return new ArrayList<>(addresses.values());
}
public void collect(Event e) {
events.add(e);
}
public void finishChunk() {
Collections.sort(events);
events = filter(events);
}
public void finish() {
wrapped.setFactor(this.factor);
if (grain > 0) {
wrapped.coarsen(grain);
}
for (Event e : events) {
wrapped.collect(e);
}
}
public void coarsen(double grain) {
// Delay coarsening until the final chunk is processed.
this.grain = grain;
}
public void setFactor(double factor) {
this.factor = factor;
}
public void resetChunk() {
wrapped.resetChunk();
}
public void forEach(IEventAggregator.Visitor visitor) {
wrapped.forEach(visitor);
}
public void forEach(IEventAggregator.ValueVisitor visitor) {
wrapped.forEach(visitor);
}
}

View File

@@ -16,6 +16,7 @@ enum EventType {
EXECUTION_SAMPLE, EXECUTION_SAMPLE,
WALL_CLOCK_SAMPLE, WALL_CLOCK_SAMPLE,
INSTRUMENTED_METHOD, INSTRUMENTED_METHOD,
MALLOC_SAMPLE,
ALLOC_SAMPLE, ALLOC_SAMPLE,
ALLOC_OUTSIDE_TLAB, ALLOC_OUTSIDE_TLAB,
LIVE_OBJECT, LIVE_OBJECT,
@@ -75,4 +76,11 @@ class ProfilingWindow : public Event {
u64 _end_time; u64 _end_time;
}; };
class MallocEvent : public Event {
public:
u64 _start_time;
uintptr_t _address;
u64 _size;
};
#endif // _EVENT_H #endif // _EVENT_H

View File

@@ -1224,6 +1224,19 @@ class Recording {
buf->put8(start, buf->offset() - start); buf->put8(start, buf->offset() - start);
} }
void recordMallocSample(Buffer* buf, int tid, u32 call_trace_id, MallocEvent* event) {
int start = buf->skip(1);
buf->put8(event->_size != 0 ? T_MALLOC : T_FREE);
buf->putVar64(event->_start_time);
buf->putVar32(tid);
buf->putVar32(call_trace_id);
buf->putVar64(event->_address);
if (event->_size != 0) {
buf->putVar64(event->_size);
}
buf->put8(start, buf->offset() - start);
}
void recordLiveObject(Buffer* buf, int tid, u32 call_trace_id, LiveObject* event) { void recordLiveObject(Buffer* buf, int tid, u32 call_trace_id, LiveObject* event) {
int start = buf->skip(1); int start = buf->skip(1);
buf->put8(T_LIVE_OBJECT); buf->put8(T_LIVE_OBJECT);
@@ -1477,6 +1490,9 @@ void FlightRecorder::recordEvent(int lock_index, int tid, u32 call_trace_id,
case WALL_CLOCK_SAMPLE: case WALL_CLOCK_SAMPLE:
_rec->recordWallClockSample(buf, tid, call_trace_id, (WallClockEvent*)event); _rec->recordWallClockSample(buf, tid, call_trace_id, (WallClockEvent*)event);
break; break;
case MALLOC_SAMPLE:
_rec->recordMallocSample(buf, tid, call_trace_id, (MallocEvent*)event);
break;
case ALLOC_SAMPLE: case ALLOC_SAMPLE:
_rec->recordAllocationInNewTLAB(buf, tid, call_trace_id, (AllocEvent*)event); _rec->recordAllocationInNewTLAB(buf, tid, call_trace_id, (AllocEvent*)event);
break; break;

View File

@@ -229,6 +229,21 @@ JfrMetadata::JfrMetadata() : Element("root") {
<< field("state", T_THREAD_STATE, "Thread State", F_CPOOL) << field("state", T_THREAD_STATE, "Thread State", F_CPOOL)
<< field("samples", T_INT, "Samples", F_UNSIGNED)) << field("samples", T_INT, "Samples", F_UNSIGNED))
<< (type("profiler.Malloc", T_MALLOC, "malloc")
<< category("Java Virtual Machine", "Native Memory")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
<< field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL)
<< field("address", T_LONG, "Address", F_ADDRESS)
<< field("size", T_LONG, "Size", F_BYTES))
<< (type("profiler.Free", T_FREE, "free")
<< category("Java Virtual Machine", "Native Memory")
<< field("startTime", T_LONG, "Start Time", F_TIME_TICKS)
<< field("eventThread", T_THREAD, "Event Thread", F_CPOOL)
<< field("stackTrace", T_STACK_TRACE, "Stack Trace", F_CPOOL)
<< field("address", T_LONG, "Address", F_ADDRESS))
<< (type("jdk.jfr.Label", T_LABEL, NULL) << (type("jdk.jfr.Label", T_LABEL, NULL)
<< field("value", T_STRING)) << field("value", T_STRING))

View File

@@ -60,6 +60,8 @@ enum JfrType {
T_WINDOW = 116, T_WINDOW = 116,
T_LIVE_OBJECT = 117, T_LIVE_OBJECT = 117,
T_WALL_CLOCK_SAMPLE = 118, T_WALL_CLOCK_SAMPLE = 118,
T_MALLOC = 119,
T_FREE = 120,
T_ANNOTATION = 200, T_ANNOTATION = 200,
T_LABEL = 201, T_LABEL = 201,

View File

@@ -44,7 +44,7 @@ static const char USAGE_STRING[] =
" collect collect profile for the specified period of time\n" " collect collect profile for the specified period of time\n"
" and then stop (default action)\n" " and then stop (default action)\n"
"Options:\n" "Options:\n"
" -e event profiling event: cpu|alloc|lock|cache-misses etc.\n" " -e event profiling event: cpu|alloc|nativemem|lock|cache-misses etc.\n"
" -d duration run profiling for <duration> seconds\n" " -d duration run profiling for <duration> seconds\n"
" -f filename dump output to <filename>\n" " -f filename dump output to <filename>\n"
" -i interval sampling interval in nanoseconds\n" " -i interval sampling interval in nanoseconds\n"
@@ -69,6 +69,7 @@ static const char USAGE_STRING[] =
" --loop time run profiler in a loop\n" " --loop time run profiler in a loop\n"
" --alloc bytes allocation profiling interval in bytes\n" " --alloc bytes allocation profiling interval in bytes\n"
" --live build allocation profile from live objects only\n" " --live build allocation profile from live objects only\n"
" --nativemem bytes native allocation profiling interval in bytes\n"
" --lock duration lock profiling threshold in nanoseconds\n" " --lock duration lock profiling threshold in nanoseconds\n"
" --wall interval wall clock profiling interval\n" " --wall interval wall clock profiling interval\n"
" --total accumulate the total value (time, bytes, etc.)\n" " --total accumulate the total value (time, bytes, etc.)\n"
@@ -491,7 +492,7 @@ int main(int argc, const char** argv) {
} else if (arg == "--reverse" || arg == "--samples" || arg == "--total" || arg == "--sched" || arg == "--live") { } else if (arg == "--reverse" || arg == "--samples" || arg == "--total" || arg == "--sched" || arg == "--live") {
format << "," << (arg.str() + 2); format << "," << (arg.str() + 2);
} else if (arg == "--alloc" || arg == "--lock" || arg == "--wall" || } else if (arg == "--alloc" || arg == "--nativemem" || arg == "--lock" || arg == "--wall" ||
arg == "--chunksize" || arg == "--chunktime" || arg == "--chunksize" || arg == "--chunktime" ||
arg == "--cstack" || arg == "--signal" || arg == "--clock" || arg == "--begin" || arg == "--end") { arg == "--cstack" || arg == "--signal" || arg == "--clock" || arg == "--begin" || arg == "--end") {
params << "," << (arg.str() + 2) << "=" << args.next(); params << "," << (arg.str() + 2) << "=" << args.next();

233
src/mallocTracer.cpp Normal file
View File

@@ -0,0 +1,233 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#include "asprof.h"
#include "assert.h"
#include "codeCache.h"
#include "mallocTracer.h"
#include "os.h"
#include "profiler.h"
#include "tsc.h"
#include <dlfcn.h>
#include <string.h>
#define ADDRESS_OF(sym) ({ \
void* addr = dlsym(RTLD_NEXT, #sym); \
addr != NULL ? (sym##_t)addr : sym; \
})
typedef void* (*malloc_t)(size_t);
static malloc_t _orig_malloc = NULL;
typedef void* (*calloc_t)(size_t, size_t);
static calloc_t _orig_calloc = NULL;
typedef void* (*realloc_t)(void*, size_t);
static realloc_t _orig_realloc = NULL;
typedef void (*free_t)(void*);
static free_t _orig_free = NULL;
static void* malloc_hook(size_t size) {
void* ret = _orig_malloc(size);
if (likely(ret && size)) {
MallocTracer::recordMalloc(ret, size);
}
return ret;
}
#ifdef __linux__
extern "C" WEAK DLLEXPORT void* malloc(size_t size) {
if (likely(MallocTracer::initialized())) {
return malloc_hook(size);
}
if (unlikely(!_orig_malloc)) {
return NULL;
}
return _orig_malloc(size);
}
#endif
static void* calloc_hook(size_t num, size_t size) {
void* ret = _orig_calloc(num, size);
if (likely(ret && num && size)) {
MallocTracer::recordMalloc(ret, num * size);
}
return ret;
}
#ifdef __linux__
extern "C" WEAK DLLEXPORT void* calloc(size_t num, size_t size) {
if (likely(MallocTracer::initialized())) {
return calloc_hook(num, size);
}
if (unlikely(!_orig_calloc)) {
return NULL;
}
return _orig_calloc(num, size);
}
#endif
static void* realloc_hook(void* addr, size_t size) {
void* ret = _orig_realloc(addr, size);
if (likely(ret && addr)) {
MallocTracer::recordFree(addr);
}
if (likely(ret && size)) {
MallocTracer::recordMalloc(ret, size);
}
return ret;
}
#ifdef __linux__
extern "C" WEAK DLLEXPORT void* realloc(void* addr, size_t size) {
if (likely(MallocTracer::initialized())) {
return realloc_hook(addr, size);
}
if (unlikely(!_orig_realloc)) {
return NULL;
}
return _orig_realloc(addr, size);
}
#endif
static void free_hook(void* addr) {
_orig_free(addr);
if (addr) {
MallocTracer::recordFree(addr);
}
}
#ifdef __linux__
extern "C" WEAK DLLEXPORT void free(void* addr) {
if (likely(MallocTracer::initialized())) {
return free_hook(addr);
}
if (unlikely(!_orig_free)) {
return;
}
return _orig_free(addr);
}
#endif
u64 MallocTracer::_interval;
volatile u64 MallocTracer::_allocated_bytes;
Mutex MallocTracer::_patch_lock;
int MallocTracer::_patched_libs = 0;
bool MallocTracer::_initialized = false;
__attribute__((constructor)) static void getOrigAddresses() {
// Store these addresses, regardless of MallocTracer being enabled or not.
_orig_malloc = ADDRESS_OF(malloc);
_orig_calloc = ADDRESS_OF(calloc);
_orig_realloc = ADDRESS_OF(realloc);
_orig_free = ADDRESS_OF(free);
}
bool MallocTracer::initialize() {
if (!__sync_bool_compare_and_swap(&_initialized, false, true)) {
return false;
}
CodeCache* lib = Profiler::instance()->findLibraryByAddress((void*)MallocTracer::initialize);
assert(lib);
lib->mark(
[](const char* s) -> bool {
return strncmp(s, "_ZL11malloc_hook", 16) == 0
|| strncmp(s, "_ZL11calloc_hook", 16) == 0
|| strncmp(s, "_ZL12realloc_hook", 17) == 0
|| strncmp(s, "_ZL9free_hook", 13) == 0;
},
MARK_ASYNC_PROFILER);
return installHooks();
}
bool MallocTracer::patchLibs(bool install) {
if (!initialized()) {
return false;
}
MutexLocker ml(_patch_lock);
if (!install) {
assert(_orig_malloc);
assert(_orig_calloc);
assert(_orig_realloc);
assert(_orig_free);
_patched_libs = 0;
}
CodeCacheArray* native_libs = Profiler::instance()->nativeLibs();
int native_lib_count = native_libs->count();
while (_patched_libs < native_lib_count) {
CodeCache* cc = (*native_libs)[_patched_libs++];
cc->patchImport(im_malloc, (void*)(install ? malloc_hook : _orig_malloc));
cc->patchImport(im_calloc, (void*)(install ? calloc_hook : _orig_calloc));
cc->patchImport(im_realloc, (void*)(install ? realloc_hook : _orig_realloc));
cc->patchImport(im_free, (void*)(install ? free_hook : _orig_free));
}
if (!install) {
_patched_libs = 0;
}
return true;
}
void MallocTracer::recordMalloc(void* address, size_t size) {
if (updateCounter(_allocated_bytes, size, _interval)) {
MallocEvent event;
event._start_time = TSC::ticks();
event._address = (uintptr_t)address;
event._size = size;
Profiler::instance()->recordSample(NULL, size, MALLOC_SAMPLE, &event);
}
}
void MallocTracer::recordFree(void* address) {
MallocEvent event;
event._start_time = TSC::ticks();
event._address = (uintptr_t)address;
event._size = 0;
Profiler::instance()->recordEventOnly(MALLOC_SAMPLE, &event);
}
Error MallocTracer::check(Arguments& args) {
if (!OS::isLinux()) {
return Error("nativemem option is only supported on linux.");
} else {
return Error::OK;
}
}
Error MallocTracer::start(Arguments& args) {
Error error = check(args);
if (error) {
return error;
}
_interval = args._nativemem > 0 ? args._nativemem : 0;
_allocated_bytes = 0;
if (!initialize() && initialized()) {
// Restart.
installHooks();
}
return Error::OK;
}
void MallocTracer::stop() {
patchLibs(false);
}

57
src/mallocTracer.h Normal file
View File

@@ -0,0 +1,57 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _MALLOCTRACER_H
#define _MALLOCTRACER_H
#include <stdint.h>
#include "engine.h"
#include "event.h"
#include "mutex.h"
#include "trap.h"
class MallocTracer : public Engine {
private:
static u64 _interval;
static volatile u64 _allocated_bytes;
static Mutex _patch_lock;
static int _patched_libs;
static bool _initialized;
static bool initialize();
static bool patchLibs(bool install);
public:
const char* type() {
return "malloc_tracer";
}
const char* title() {
return "Malloc/free profile";
}
const char* units() {
return "bytes";
}
Error check(Arguments& args);
Error start(Arguments& args);
void stop();
inline static bool installHooks() {
return patchLibs(true);
}
inline static bool initialized() {
return _initialized;
}
static void recordMalloc(void* address, size_t size);
static void recordFree(void* address);
};
#endif // _MALLOCTRACER_H

View File

@@ -15,6 +15,7 @@
#include "perfEvents.h" #include "perfEvents.h"
#include "ctimer.h" #include "ctimer.h"
#include "allocTracer.h" #include "allocTracer.h"
#include "mallocTracer.h"
#include "lockTracer.h" #include "lockTracer.h"
#include "wallClock.h" #include "wallClock.h"
#include "j9ObjectSampler.h" #include "j9ObjectSampler.h"
@@ -46,6 +47,7 @@ static SigAction orig_segvHandler = NULL;
static Engine noop_engine; static Engine noop_engine;
static PerfEvents perf_events; static PerfEvents perf_events;
static AllocTracer alloc_tracer; static AllocTracer alloc_tracer;
static MallocTracer malloc_tracer;
static LockTracer lock_tracer; static LockTracer lock_tracer;
static ObjectSampler object_sampler; static ObjectSampler object_sampler;
static J9ObjectSampler j9_object_sampler; static J9ObjectSampler j9_object_sampler;
@@ -63,7 +65,8 @@ enum EventMask {
EM_CPU = 1, EM_CPU = 1,
EM_ALLOC = 2, EM_ALLOC = 2,
EM_LOCK = 4, EM_LOCK = 4,
EM_WALL = 8 EM_WALL = 8,
EM_NATIVEMEM = 16,
}; };
@@ -89,6 +92,7 @@ static inline int hasNativeStack(EventType event_type) {
(1 << PERF_SAMPLE) | (1 << PERF_SAMPLE) |
(1 << EXECUTION_SAMPLE) | (1 << EXECUTION_SAMPLE) |
(1 << WALL_CLOCK_SAMPLE) | (1 << WALL_CLOCK_SAMPLE) |
(1 << MALLOC_SAMPLE) |
(1 << ALLOC_SAMPLE) | (1 << ALLOC_SAMPLE) |
(1 << ALLOC_OUTSIDE_TLAB); (1 << ALLOC_OUTSIDE_TLAB);
return (1 << event_type) & events_with_native_stack; return (1 << event_type) & events_with_native_stack;
@@ -352,6 +356,9 @@ int Profiler::convertNativeTrace(int native_frames, const void** callchain, ASGC
// Skip all internal frames above VM runtime entry for allocation samples // Skip all internal frames above VM runtime entry for allocation samples
depth = 0; depth = 0;
continue; continue;
} else if (mark == MARK_ASYNC_PROFILER && event_type == MALLOC_SAMPLE) {
// Skip all internal frames above the *_hook functions. Include the hook function itself.
depth = 0;
} else if (mark == MARK_INTERPRETER) { } else if (mark == MARK_INTERPRETER) {
// This is C++ interpreter frame, this and later frames should be reported // This is C++ interpreter frame, this and later frames should be reported
// as Java frames returned by AGCT. Terminate the scan here. // as Java frames returned by AGCT. Terminate the scan here.
@@ -671,6 +678,8 @@ u64 Profiler::recordSample(void* ucontext, u64 counter, EventType event_type, Ev
} else { } else {
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth, &java_ctx); num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth, &java_ctx);
} }
} else if (event_type == MALLOC_SAMPLE && malloc_tracer.initialized()) {
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth, &java_ctx);
} else { } else {
// Lock events and instrumentation events can safely call synchronous JVM TI stack walker. // Lock events and instrumentation events can safely call synchronous JVM TI stack walker.
// Skip Instrument.recordSample() method // Skip Instrument.recordSample() method
@@ -784,6 +793,7 @@ void* Profiler::dlopen_hook(const char* filename, int flags) {
void* result = dlopen(filename, flags); void* result = dlopen(filename, flags);
if (result != NULL) { if (result != NULL) {
instance()->updateSymbols(false); instance()->updateSymbols(false);
MallocTracer::installHooks();
} }
return result; return result;
} }
@@ -1028,6 +1038,8 @@ Engine* Profiler::activeEngine() {
return &lock_tracer; return &lock_tracer;
case EM_WALL: case EM_WALL:
return &wall_clock; return &wall_clock;
case EM_NATIVEMEM:
return &malloc_tracer;
default: default:
return _engine; return _engine;
} }
@@ -1072,7 +1084,9 @@ Error Profiler::start(Arguments& args, bool reset) {
_event_mask = (args._event != NULL ? EM_CPU : 0) | _event_mask = (args._event != NULL ? EM_CPU : 0) |
(args._alloc >= 0 ? EM_ALLOC : 0) | (args._alloc >= 0 ? EM_ALLOC : 0) |
(args._lock >= 0 ? EM_LOCK : 0) | (args._lock >= 0 ? EM_LOCK : 0) |
(args._wall >= 0 ? EM_WALL : 0); (args._wall >= 0 ? EM_WALL : 0) |
(args._nativemem >= 0 ? EM_NATIVEMEM : 0);
if (_event_mask == 0) { if (_event_mask == 0) {
return Error("No profiling events specified"); return Error("No profiling events specified");
} else if ((_event_mask & (_event_mask - 1)) && args._output != OUTPUT_JFR) { } else if ((_event_mask & (_event_mask - 1)) && args._output != OUTPUT_JFR) {
@@ -1164,7 +1178,6 @@ Error Profiler::start(Arguments& args, bool reset) {
if (error) { if (error) {
return error; return error;
} }
switchLibraryTrap(true); switchLibraryTrap(true);
if (args._output == OUTPUT_JFR) { if (args._output == OUTPUT_JFR) {
@@ -1200,6 +1213,12 @@ Error Profiler::start(Arguments& args, bool reset) {
goto error4; goto error4;
} }
} }
if (_event_mask & EM_NATIVEMEM) {
error = malloc_tracer.start(args);
if (error) {
goto error5;
}
}
switchThreadEvents(JVMTI_ENABLE); switchThreadEvents(JVMTI_ENABLE);
@@ -1214,6 +1233,9 @@ Error Profiler::start(Arguments& args, bool reset) {
return Error::OK; return Error::OK;
error5:
if (_event_mask & EM_NATIVEMEM) malloc_tracer.stop();
error4: error4:
if (_event_mask & EM_LOCK) lock_tracer.stop(); if (_event_mask & EM_LOCK) lock_tracer.stop();
@@ -1246,6 +1268,7 @@ Error Profiler::stop(bool restart) {
if (_event_mask & EM_WALL) wall_clock.stop(); if (_event_mask & EM_WALL) wall_clock.stop();
if (_event_mask & EM_LOCK) lock_tracer.stop(); if (_event_mask & EM_LOCK) lock_tracer.stop();
if (_event_mask & EM_ALLOC) _alloc_engine->stop(); if (_event_mask & EM_ALLOC) _alloc_engine->stop();
if (_event_mask & EM_NATIVEMEM) malloc_tracer.stop();
_engine->stop(); _engine->stop();
@@ -1289,6 +1312,9 @@ Error Profiler::check(Arguments& args) {
_alloc_engine = selectAllocEngine(args._alloc, args._live); _alloc_engine = selectAllocEngine(args._alloc, args._live);
error = _alloc_engine->check(args); error = _alloc_engine->check(args);
} }
if (!error && args._nativemem >= 0) {
error = malloc_tracer.check(args);
}
if (!error && args._lock >= 0) { if (!error && args._lock >= 0) {
error = lock_tracer.check(args); error = lock_tracer.check(args);
} }
@@ -1754,6 +1780,7 @@ Error Profiler::runInternal(Arguments& args, Writer& out) {
out << "Basic events:\n"; out << "Basic events:\n";
out << " " << EVENT_CPU << "\n"; out << " " << EVENT_CPU << "\n";
out << " " << EVENT_ALLOC << "\n"; out << " " << EVENT_ALLOC << "\n";
out << " " << EVENT_NATIVEMEM << "\n";
out << " " << EVENT_LOCK << "\n"; out << " " << EVENT_LOCK << "\n";
out << " " << EVENT_WALL << "\n"; out << " " << EVENT_WALL << "\n";
out << " " << EVENT_ITIMER << "\n"; out << " " << EVENT_ITIMER << "\n";

View File

@@ -0,0 +1,27 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
#include <jni.h>
#include <stdint.h>
#include <stdlib.h>
JNIEXPORT jlong JNICALL Java_test_nativemem_Native_malloc(JNIEnv* env, jclass clazz, jlong size) {
void* ptr = malloc((size_t)size);
return (jlong)(intptr_t)ptr;
}
JNIEXPORT jlong JNICALL Java_test_nativemem_Native_calloc(JNIEnv* env, jclass clazz, jlong num, jlong size) {
void* ptr = calloc(num, (size_t)size);
return (jlong)(intptr_t)ptr;
}
JNIEXPORT jlong JNICALL Java_test_nativemem_Native_realloc(JNIEnv* env, jclass clazz, jlong addr, jlong size) {
void* ptr = realloc((void*)(intptr_t)addr, (size_t)size);
return (jlong)(intptr_t)ptr;
}
JNIEXPORT void JNICALL Java_test_nativemem_Native_free(JNIEnv* env, jclass clazz, jlong addr) {
free((void*)(intptr_t)addr);
}

View File

@@ -68,6 +68,8 @@ struct TestCase {
#define __ASSERT_OR_CHECK_OP(isAssert, val1, op, val2) \ #define __ASSERT_OR_CHECK_OP(isAssert, val1, op, val2) \
{ \ { \
_Pragma("GCC diagnostic push"); \
_Pragma("GCC diagnostic ignored \"-Waddress\""); \
const bool is_string = \ const bool is_string = \
std::is_same<decltype(val1), const char*>::value || std::is_same<decltype(val1), char*>::value || \ std::is_same<decltype(val1), const char*>::value || std::is_same<decltype(val1), char*>::value || \
std::is_same<decltype(val2), const char*>::value || std::is_same<decltype(val2), char*>::value; \ std::is_same<decltype(val2), const char*>::value || std::is_same<decltype(val2), char*>::value; \
@@ -96,6 +98,7 @@ struct TestCase {
} else { \ } else { \
test_case.assertion_count++; \ test_case.assertion_count++; \
} \ } \
_Pragma("GCC diagnostic pop"); \
} }
// ASSERT stops execution after a failure. // ASSERT stops execution after a failure.

View File

@@ -221,7 +221,6 @@ public class Runner {
List<RunnableTest> allTests = getRunnableTests(args); List<RunnableTest> allTests = getRunnableTests(args);
final int testCount = allTests.size(); final int testCount = allTests.size();
int i = 1; int i = 1;
long totalTestDuration = 0; long totalTestDuration = 0;
List<String> failedTests = new ArrayList<>(); List<String> failedTests = new ArrayList<>();

View File

@@ -26,6 +26,8 @@ public @interface Test {
String jvmArgs() default ""; String jvmArgs() default "";
String[] env() default {};
boolean debugNonSafepoints() default false; boolean debugNonSafepoints() default false;
boolean output() default false; boolean output() default false;

View File

@@ -77,6 +77,14 @@ public class TestProcess implements Closeable {
if (test.error()) { if (test.error()) {
pb.redirectError(createTempFile(STDERR)); pb.redirectError(createTempFile(STDERR));
} }
for (String env : test.env()) {
String[] keyValue = env.split("=", 2);
if (keyValue.length == 2) {
pb.environment().put(keyValue[0], keyValue[1]);
}
}
this.p = pb.start(); this.p = pb.start();
if (cmd.get(0).endsWith("java")) { if (cmd.get(0).endsWith("java")) {
@@ -118,6 +126,7 @@ public class TestProcess implements Closeable {
cmd.add("-XX:+UnlockDiagnosticVMOptions"); cmd.add("-XX:+UnlockDiagnosticVMOptions");
cmd.add("-XX:+DebugNonSafepoints"); cmd.add("-XX:+DebugNonSafepoints");
} }
cmd.add("-Djava.library.path=" + System.getProperty("java.library.path"));
addArgs(cmd, test.jvmArgs()); addArgs(cmd, test.jvmArgs());
if (!test.agentArgs().isEmpty()) { if (!test.agentArgs().isEmpty()) {
cmd.add("-agentpath:" + profilerLibPath() + "=" + cmd.add("-agentpath:" + profilerLibPath() + "=" +

View File

@@ -0,0 +1,47 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package test.nativemem;
public class CallsAllNoLeak {
private static final int NUM_THREADS = 8; // Number of threads
private static final int MALLOC_SIZE = 1999993; // Prime size, useful in assertions.
private static final int CALLOC_SIZE = 2000147;
private static final int REALLOC_SIZE = 30000170;
private static void do_work(boolean once) {
try {
do {
long addr = Native.malloc(MALLOC_SIZE);
long reallocd = Native.realloc(addr, REALLOC_SIZE);
Native.free(reallocd);
long callocd = Native.calloc(1, CALLOC_SIZE);
Native.free(callocd);
Thread.sleep(1);
} while (!once);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
System.err.println("Thread interrupted: " + Thread.currentThread().getName());
}
}
public static void main(String[] args) throws InterruptedException {
final boolean once = args.length > 0 && args[0].equals("once");
final Thread[] threads = new Thread[NUM_THREADS];
for (int i = 0; i < NUM_THREADS; i++) {
threads[i] = new Thread(() -> do_work(once), "MemoryTask-" + i);
threads[i].start();
}
for (Thread thread : threads) {
thread.join();
}
}
}

View File

@@ -0,0 +1,24 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package test.nativemem;
public class CallsMallocCalloc {
private static final int MALLOC_SIZE = 1999993; // Prime size, useful in assertions.
private static final int CALLOC_SIZE = 2000147;
public static void main(String[] args) throws InterruptedException {
final boolean once = args.length > 0 && args[0].equals("once");
do {
Native.malloc(MALLOC_SIZE);
Native.calloc(1, CALLOC_SIZE);
// allocate every 1 second.
Thread.sleep(1000);
} while (!once);
}
}

View File

@@ -0,0 +1,24 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package test.nativemem;
public class CallsRealloc {
private static final int MALLOC_SIZE = 1999993; // Prime size, useful in assertions.
private static final int REALLOC_SIZE = 30000170;
public static void main(String[] args) throws InterruptedException {
final boolean once = args.length > 0 && args[0].equals("once");
do {
long addr = Native.malloc(MALLOC_SIZE);
long reallocd = Native.realloc(addr, REALLOC_SIZE);
// allocate every 1 second.
Thread.sleep(1000);
} while (!once);
}
}

View File

@@ -0,0 +1,20 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package test.nativemem;
public class Native {
static {
System.loadLibrary("jnimalloc");
}
public static native long malloc(int size);
public static native long realloc(long addr, int size);
public static native long calloc(long num, int size);
public static native long free(long addr);
}

View File

@@ -0,0 +1,125 @@
/*
* Copyright The async-profiler authors
* SPDX-License-Identifier: Apache-2.0
*/
package test.nativemem;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import one.jfr.JfrReader;
import one.jfr.StackTrace;
import one.jfr.event.MallocEvent;
import one.profiler.test.Assert;
import one.profiler.test.Os;
import one.profiler.test.Output;
import one.profiler.test.Test;
import one.profiler.test.TestProcess;
public class NativememTests {
private static final int MALLOC_SIZE = 1999993;
private static final int CALLOC_SIZE = 2000147;
private static final int REALLOC_SIZE = 30000170;
@Test(mainClass = CallsMallocCalloc.class, os = Os.LINUX, agentArgs = "start,nativemem,total,collapsed,file=%f", args = "once")
public void canAgentTraceMallocCalloc(TestProcess p) throws Exception {
Output out = p.waitForExit("%f");
Assert.isEqual(out.samples("Java_test_nativemem_Native_malloc"), MALLOC_SIZE);
Assert.isEqual(out.samples("Java_test_nativemem_Native_calloc"), CALLOC_SIZE);
}
@Test(mainClass = CallsMallocCalloc.class, os = Os.LINUX, agentArgs = "start,nativemem=10000000,total,collapsed,file=%f", args = "once")
public void canAgentFilterMallocCalloc(TestProcess p) throws Exception {
Output out = p.waitForExit("%f");
Assert.isEqual(out.samples("Java_test_nativemem_Native_malloc"), 0);
Assert.isEqual(out.samples("Java_test_nativemem_Native_calloc"), 0);
}
@Test(mainClass = CallsMallocCalloc.class, os = Os.LINUX)
public void canAsprofTraceMallocCalloc(TestProcess p) throws Exception {
Output out = p.profile("-e nativemem --total -o collapsed -d 2");
long samplesMalloc = out.samples("Java_test_nativemem_Native_malloc");
long samplesCalloc = out.samples("Java_test_nativemem_Native_calloc");
Assert.isGreater(samplesMalloc, 0);
Assert.isGreater(samplesCalloc, 0);
Assert.isEqual(samplesMalloc % MALLOC_SIZE, 0);
Assert.isEqual(samplesCalloc % CALLOC_SIZE, 0);
}
@Test(mainClass = CallsRealloc.class, agentArgs = "start,nativemem,total,collapsed,file=%f", args = "once", os = Os.LINUX)
public void canAgentTraceRealloc(TestProcess p) throws Exception {
Output out = p.waitForExit("%f");
Assert.isEqual(out.samples("Java_test_nativemem_Native_malloc"), MALLOC_SIZE);
Assert.isEqual(out.samples("Java_test_nativemem_Native_realloc"), REALLOC_SIZE);
}
@Test(mainClass = CallsRealloc.class, os = Os.LINUX)
public void canAsprofTraceRealloc(TestProcess p) throws Exception {
Output out = p.profile("-e nativemem --total -o collapsed -d 2");
long samplesMalloc = out.samples("Java_test_nativemem_Native_malloc");
long samplesRealloc = out.samples("Java_test_nativemem_Native_realloc");
Assert.isGreater(samplesMalloc, 0);
Assert.isGreater(samplesRealloc, 0);
Assert.isEqual(samplesMalloc % MALLOC_SIZE, 0);
Assert.isEqual(samplesRealloc % REALLOC_SIZE, 0);
}
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX)
public void canAsprofTraceAllNoLeak(TestProcess p) throws Exception {
Output out = p.profile("-e nativemem --total -o collapsed -d 2");
long samplesMalloc = out.samples("Java_test_nativemem_Native_malloc");
long samplesCalloc = out.samples("Java_test_nativemem_Native_calloc");
long samplesRealloc = out.samples("Java_test_nativemem_Native_realloc");
Assert.isGreater(samplesMalloc, 0);
Assert.isGreater(samplesCalloc, 0);
Assert.isGreater(samplesRealloc, 0);
Assert.isEqual(samplesMalloc % MALLOC_SIZE, 0);
Assert.isEqual(samplesCalloc % CALLOC_SIZE, 0);
Assert.isEqual(samplesRealloc % REALLOC_SIZE, 0);
}
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,nativemem,file=%f.jfr")
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,nativemem,total,file=%f.jfr")
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,nativemem=1,total,file=%f.jfr")
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,nativemem=10M,total,file=%f.jfr")
@Test(mainClass = CallsAllNoLeak.class, os = Os.LINUX, args = "once", agentArgs = "start,cpu,alloc,nativemem,total,file=%f.jfr")
public void livenessJfrHasStacks(TestProcess p) throws Exception {
p.waitForExit();
String filename = p.getFile("%f").toPath().toString();
try (JfrReader r = new JfrReader(filename)) {
List<MallocEvent> events = r.readAllEvents(MallocEvent.class);
assert !events.isEmpty() : "No MallocEvent events found in the JFR output";
long totalAllocated = 0;
Map<Long, MallocEvent> addresses = new HashMap<>();
for (MallocEvent event : events) {
// only interested in specific sizes.
if (event.size != 0 && event.size != MALLOC_SIZE && event.size != CALLOC_SIZE
&& event.size != REALLOC_SIZE) {
continue;
}
totalAllocated += event.size;
if (event.size > 0) {
addresses.put(event.address, event);
} else {
addresses.remove(event.address);
}
}
Assert.isGreater(totalAllocated, 0);
Assert.isEqual(addresses.size(), 0);
}
}
}