Skip to content

Commit 540a807

Browse files
Redo: Add profiling of already running tasks via SIGINFO/SIGUSR1 (#44185)
* Redo: Add profiling of already running tasks via SIGINFO/SIGUSR1 * fix precompile script
1 parent 7cd11a6 commit 540a807

File tree

10 files changed

+275
-41
lines changed

10 files changed

+275
-41
lines changed

NEWS.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,17 @@ Standard library changes
146146
* Now uses `textwidth` for formatting `%s` and `%c` widths ([#41085]).
147147

148148
#### Profile
149-
* Profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows
149+
* CPU profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows
150150
grouping by thread, task, or nested thread/task, task/thread, and `threads` and `tasks` kwargs to allow filtering.
151151
Further, percent utilization is now reported as a total or per-thread, based on whether the thread is idle or not at
152152
each sample. `Profile.fetch()` by default strips out the new metadata to ensure backwards compatibility with external
153153
profiling data consumers, but can be included with the `include_meta` kwarg. ([#41742])
154154
* The new `Profile.Allocs` module allows memory allocations to be profiled. The stack trace, type, and size of each
155155
allocation is recorded, and a `sample_rate` argument allows a tunable amount of allocations to be skipped,
156156
reducing performance overhead. ([#42768])
157+
* A fixed duration cpu profile can now be triggered by the user during running tasks without `Profile` being loaded
158+
first and the report will show during execution. On MacOS & FreeBSD press `ctrl-t` or raise a `SIGINFO`.
159+
For other platforms raise a `SIGUSR1` i.e. `% kill -USR1 $julia_pid`. Not currently available on windows. ([#43179])
157160

158161
#### Random
159162

contrib/generate_precompile.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ Profile = get(Base.loaded_modules,
221221
Base.PkgId(Base.UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile"),
222222
nothing)
223223
if Profile !== nothing
224+
repl_script *= Profile.precompile_script
224225
hardcoded_precompile_statements *= """
225226
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
226227
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})

src/julia_internal.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT;
128128
// Returns time in nanosec
129129
JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT;
130130

131+
JL_DLLEXPORT void jl_set_peek_cond(uintptr_t);
132+
JL_DLLEXPORT double jl_get_profile_peek_duration(void);
133+
JL_DLLEXPORT void jl_set_profile_peek_duration(double);
134+
131135
// number of cycles since power-on
132136
static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
133137
{

src/signal-handling.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,35 @@ static size_t jl_safe_read_mem(const volatile char *ptr, char *out, size_t len)
129129
return i;
130130
}
131131

132+
static double profile_autostop_time = -1.0;
133+
static double profile_peek_duration = 1.0; // seconds
134+
135+
double jl_get_profile_peek_duration(void) {
136+
return profile_peek_duration;
137+
}
138+
void jl_set_profile_peek_duration(double t) {
139+
profile_peek_duration = t;
140+
return;
141+
}
142+
143+
uintptr_t profile_show_peek_cond_loc;
144+
JL_DLLEXPORT void jl_set_peek_cond(uintptr_t cond)
145+
{
146+
profile_show_peek_cond_loc = cond;
147+
return;
148+
}
149+
150+
static void jl_check_profile_autostop(void) {
151+
if ((profile_autostop_time != -1.0) && (jl_hrtime() > profile_autostop_time)) {
152+
profile_autostop_time = -1.0;
153+
jl_profile_stop_timer();
154+
jl_safe_printf("\n==============================================================\n");
155+
jl_safe_printf("Profile collected. A report will print at the next yield point\n");
156+
jl_safe_printf("==============================================================\n\n");
157+
uv_async_send((uv_async_t*)profile_show_peek_cond_loc);
158+
}
159+
}
160+
132161
#if defined(_WIN32)
133162
#include "signals-win.c"
134163
#else

src/signals-mach.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,7 @@ void *mach_profile_listener(void *arg)
643643
}
644644
jl_unlock_profile_mach(0, keymgr_locked);
645645
if (running) {
646+
jl_check_profile_autostop();
646647
// Reset the alarm
647648
kern_return_t ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
648649
HANDLE_MACH_ERROR("clock_alarm", ret)

src/signals-unix.c

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ bt_context_t *jl_to_bt_context(void *sigctx)
6161
#endif
6262
}
6363

64-
6564
static int thread0_exit_count = 0;
6665
static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size);
6766

@@ -519,17 +518,22 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
519518
sigprof.sigev_notify = SIGEV_SIGNAL;
520519
sigprof.sigev_signo = SIGUSR1;
521520
sigprof.sigev_value.sival_ptr = &timerprof;
522-
if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1)
521+
// Because SIGUSR1 is multipurpose, set `running` before so that we know that the first SIGUSR1 came from the timer
522+
running = 1;
523+
if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1) {
524+
running = 0;
523525
return -2;
526+
}
524527

525528
// Start the timer
526529
itsprof.it_interval.tv_sec = 0;
527530
itsprof.it_interval.tv_nsec = 0;
528531
itsprof.it_value.tv_sec = nsecprof / GIGA;
529532
itsprof.it_value.tv_nsec = nsecprof % GIGA;
530-
if (timer_settime(timerprof, 0, &itsprof, NULL) == -1)
533+
if (timer_settime(timerprof, 0, &itsprof, NULL) == -1) {
534+
running = 0;
531535
return -3;
532-
running = 1;
536+
}
533537
return 0;
534538
}
535539

@@ -648,6 +652,18 @@ static void kqueue_signal(int *sigqueue, struct kevent *ev, int sig)
648652
}
649653
#endif
650654

655+
void trigger_profile_peek(void)
656+
{
657+
jl_safe_printf("\n======================================================================================\n");
658+
jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile\n", profile_peek_duration);
659+
jl_safe_printf("======================================================================================\n");
660+
bt_size_cur = 0; // clear profile buffer
661+
if (jl_profile_start_timer() < 0)
662+
jl_safe_printf("ERROR: Could not start profile timer\n");
663+
else
664+
profile_autostop_time = jl_hrtime() + (profile_peek_duration * 1e9);
665+
}
666+
651667
static void *signal_listener(void *arg)
652668
{
653669
static jl_bt_element_t bt_data[JL_MAX_BT_SIZE + 1];
@@ -753,11 +769,17 @@ static void *signal_listener(void *arg)
753769

754770
int doexit = critical;
755771
#ifdef SIGINFO
756-
if (sig == SIGINFO)
772+
if (sig == SIGINFO) {
773+
if (running != 1)
774+
trigger_profile_peek();
757775
doexit = 0;
776+
}
758777
#else
759-
if (sig == SIGUSR1)
778+
if (sig == SIGUSR1) {
779+
if (running != 1)
780+
trigger_profile_peek();
760781
doexit = 0;
782+
}
761783
#endif
762784

763785
bt_size = 0;
@@ -835,6 +857,7 @@ static void *signal_listener(void *arg)
835857
}
836858
#ifndef HAVE_MACH
837859
if (profile && running) {
860+
jl_check_profile_autostop();
838861
#if defined(HAVE_TIMER)
839862
timer_settime(timerprof, 0, &itsprof, NULL);
840863
#elif defined(HAVE_ITIMER)

src/signals-win.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
388388
jl_gc_debug_critical_error();
389389
abort();
390390
}
391+
jl_check_profile_autostop();
391392
}
392393
}
393394
}

stdlib/Profile/docs/src/index.md

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,79 @@
22

33
## CPU Profiling
44

5+
There are two main approaches to CPU profiling julia code:
6+
7+
## Via `@profile`
8+
9+
Where profiling is enabled for a given call via the `@profile` macro.
10+
11+
```julia-repl
12+
julia> using Profile
13+
14+
julia> @profile foo()
15+
16+
julia> Profile.print()
17+
Overhead ╎ [+additional indent] Count File:Line; Function
18+
=========================================================
19+
╎147 @Base/client.jl:506; _start()
20+
╎ 147 @Base/client.jl:318; exec_options(opts::Base.JLOptions)
21+
...
22+
```
23+
24+
## Triggered During Execution
25+
26+
Tasks that are already running can also be profiled for a fixed time period at any user-triggered time.
27+
28+
To trigger the profiling:
29+
- MacOS & FreeBSD (BSD-based platforms): Use `ctrl-t` or pass a `SIGINFO` signal to the julia process i.e. `% kill -INFO $julia_pid`
30+
- Linux: Pass a `SIGUSR1` signal to the julia process i.e. `% kill -USR1 $julia_pid`
31+
- Windows: Not currently supported.
32+
33+
First, a single stack trace at the instant that the signal was thrown is shown, then a 1 second profile is collected,
34+
followed by the profile report at the next yield point, which may be at task completion for code without yield points
35+
e.g. tight loops.
36+
37+
```julia-repl
38+
julia> foo()
39+
##== the user sends a trigger while foo is running ==##
40+
load: 2.53 cmd: julia 88903 running 6.16u 0.97s
41+
42+
======================================================================================
43+
Information request received. A stacktrace will print followed by a 1.0 second profile
44+
======================================================================================
45+
46+
signal (29): Information request: 29
47+
__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
48+
_pthread_cond_wait at /usr/lib/system/libsystem_pthread.dylib (unknown line)
49+
...
50+
51+
======================================================================
52+
Profile collected. A report will print if the Profile module is loaded
53+
======================================================================
54+
55+
Overhead ╎ [+additional indent] Count File:Line; Function
56+
=========================================================
57+
Thread 1 Task 0x000000011687c010 Total snapshots: 572. Utilization: 100%
58+
╎147 @Base/client.jl:506; _start()
59+
╎ 147 @Base/client.jl:318; exec_options(opts::Base.JLOptions)
60+
...
61+
62+
Thread 2 Task 0x0000000116960010 Total snapshots: 572. Utilization: 0%
63+
╎572 @Base/task.jl:587; task_done_hook(t::Task)
64+
╎ 572 @Base/task.jl:879; wait()
65+
...
66+
```
67+
68+
### Customization
69+
70+
The duration of the profiling can be adjusted via [`Profile.set_peek_duration`](@ref)
71+
72+
The profile report is broken down by thread and task. Pass a no-arg function to `Profile.peek_report[]` to override this.
73+
i.e. `Profile.peek_report[] = () -> Profile.print()` to remove any grouping. This could also be overridden by an external
74+
profile data consumer.
75+
76+
## Reference
77+
578
```@docs
679
Profile.@profile
780
```
@@ -16,6 +89,8 @@ Profile.fetch
1689
Profile.retrieve
1790
Profile.callers
1891
Profile.clear_malloc_data
92+
Profile.get_peek_duration
93+
Profile.set_peek_duration
1994
```
2095

2196
## Memory profiling

0 commit comments

Comments
 (0)