Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion courses/02_intermediate/.latexmkrc
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
$xelatex = 'xelatex -shell-escape %O %S'
$xelatex = 'xelatex -shell-escape %O %S';

# List of extension latexmk can safely clean (correspond to file created by the Beamer plug-in)
$clean_ext = "nav snm vrb";
256 changes: 255 additions & 1 deletion courses/02_intermediate/main.tex
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
% !TeX program = xelatex

% Create command `createHandout` only if it doesn't already exists.
% To compile in presentation mode, pass a command createHandout doing nothing.
\providecommand{\createHandout}{handout}

\documentclass[
aspectratio=169,
handout,
\createHandout
]{beamer}

\usepackage{minted}
Expand Down Expand Up @@ -127,6 +131,252 @@ \section{Profiling and debugging}

% _____________________________________________________________________________

\begin{frame}{Profiling and debugging tools at hand}
\begin{columns}[T]
\begin{column}{0.33\linewidth}
Kokkos

\begin{itemize}
\item KokkosP interface
\item Regions
\end{itemize}

\vspace{1em}

\structure{Note:} Not actual tools, but used by tools of the two other categories
\end{column}
\begin{column}{0.33\linewidth}
Kokkos tools

\begin{itemize}
\item Kernel timer
\item Kernel logger
\item Memory usage
\item Memory events
\item Space time stack
\end{itemize}
\end{column}
\begin{column}{0.33\linewidth}
Third-party tools

\begin{itemize}
\item VTune
\item Nsight Systems
\item Nsight Compute
\item Tau
\item Timemory
\item Caliper
\item HPCToolkit
\end{itemize}
\end{column}
\end{columns}
\end{frame}

% _____________________________________________________________________________

\begin{frame}{KokkosP interface}
\begin{itemize}
\item Provided by Kokkos
\item Hooks
\begin{itemize}
\item Parallel constructs
\item Fences
\end{itemize}
\item Designed for other tools to use it
\item Always available
\item No overhead if no tools are used
\end{itemize}
\end{frame}

% _____________________________________________________________________________

\begin{frame}[fragile]{Regions}
\begin{columns}
\begin{column}{0.6\linewidth}
\begin{minted}{C++}
Kokkos::Profiling::pushRegion("init");

Kokkos::parallel_for(
"initialize A",
N,
KOKKOS_LAMBDA(int i) {
view_a(i) = i;
}
);

Kokkos::Profiling::popRegion();
\end{minted}
\end{column}
\begin{column}{0.4\linewidth}
\begin{itemize}
\item Set regions of interest in your code
\item Provided by Kokkos
\item No specific header needed
\item Namespace \texttt{Kokkos::Profiling}
\item \texttt{pushRegion} and \texttt{popRegion} to create a region
\end{itemize}
\end{column}
\end{columns}
\end{frame}

% _____________________________________________________________________________

\begin{frame}{Kokkos tools}
\begin{itemize}
\item \githublink{\url{https://github.com/kokkos/kokkos-tools}}
\item Has a different version number than Kokkos
\item Should be built and installed somewhere
\item Use one tool at a time with the environment variable \texttt{KOKKOS\_TOOLS\_LIB}
\item Do not ship it within your program (this is a dev tool!)
\end{itemize}
\end{frame}

% _____________________________________________________________________________

\begin{frame}[fragile]{Kernel timer for a basic profiling}
\begin{columns}
\begin{column}{0.6\linewidth}
\begin{minted}[breakafter=/]{sh}
export KOKKOS_TOOLS_LIBS=/absolute/path/to/libkp_kernel_timer.so

./my_program
kp_reader ./name_of_report.dat
\end{minted}
\end{column}
\begin{column}{0.4\linewidth}
\begin{itemize}
\item Simple tool for a basic timing analysis
\item Export environment variable to use the tool
\item Run the program as usual
\item Analyze the generated data with the provided \texttt{kp\_reader} program
\end{itemize}
\end{column}
\end{columns}
\end{frame}

% _____________________________________________________________________________

\begin{frame}[fragile]{Kernel timer output}
\begin{minted}[fontsize=\scriptsize,breaklines=false]{text}
(Type) Total Time, Call Count, Avg. Time per Call, %Total Time in Kernels, %Total Program Time
-------------------------------------------------------------------------
Regions:
...
-------------------------------------------------------------------------
Kernels:
...
-------------------------------------------------------------------------
Summary:

Total Execution Time (incl. Kokkos + non-Kokkos): 0.20500 seconds
Total Time in Kokkos kernels: 0.11500 seconds
-> Time outside Kokkos kernels: 0.09000 seconds
-> Percentage in Kokkos kernels: 55.98 %
Total Calls to Kokkos Kernels: 3

-------------------------------------------------------------------------
\end{minted}
\end{frame}

% _____________________________________________________________________________

\begin{frame}[fragile]{Kernel timer output (for regions)}
\begin{minted}[fontsize=\scriptsize,breaklines=false]{text}
(Type) Total Time, Call Count, Avg. Time per Call, %Total Time in Kernels, %Total Program Time
-------------------------------------------------------------------------

Regions:

- mirror and copy
(REGION) 0.092400 1 0.092400 80.448319 45.038345
- initialization
(REGION) 0.051400 1 0.051400 44.748858 25.052289
\end{minted}

\begin{itemize}
\item Regions are named after what you set in \texttt{pushRegion}
\item One set of two lines per region
\end{itemize}
\end{frame}

% _____________________________________________________________________________

\begin{frame}[fragile]{Kernel timer output (for kernels)}
\begin{minted}[fontsize=\scriptsize,breaklines=false]{text}
(Type) Total Time, Call Count, Avg. Time per Call, %Total Time in Kernels, %Total Program Time

-------------------------------------------------------------------------
Kernels:

- Kokkos::View::initialization [vector_mirror] via memset
(ParFor) 0.064600 1 0.064600 56.226650 31.478039
- Initialize
(ParFor) 0.048400 1 0.048400 42.133665 23.588194
- Kokkos::View::initialization [vector] via memset
(ParFor) 0.001900 1 0.001900 1.639685 0.917964
\end{minted}

\begin{itemize}
\item Kernels are named after what you set in \texttt{parallel\_*}, or have default names
\item One set of two lines per kernel
\end{itemize}
\end{frame}

% _____________________________________________________________________________

\begin{frame}[fragile]{Kernel logger for a basic debugging}
\begin{columns}
\begin{column}{0.6\linewidth}
\begin{minted}[breakafter=/]{sh}
export KOKKOS_TOOLS_LIBS=/absolute/path/to/libkp_kernel_logger.so

./my_program
\end{minted}
\end{column}
\begin{column}{0.4\linewidth}
\begin{itemize}
\item Simple tool for a basic timing analysis
\item Export environment variable to use the tool
\item Run the program as usual
\item Analyze the generated data with the provided \texttt{kp\_reader} program
\end{itemize}
\end{column}
\end{columns}
\end{frame}

% _____________________________________________________________________________

\begin{frame}[fragile]{NVTX connector for NVIDIA tools}
\begin{columns}
\begin{column}{0.6\linewidth}
\begin{minted}[breakafter=/]{sh}
export KOKKOS_TOOLS_LIBS=/absolute/path/to/libkp_nvtx_connector.so

# nsight systems
nsys profile -o report ./my_program
nsys-ui report.nsys-rep

# nsight compute
ncu -o report ./my_program
\end{minted}
\end{column}
\begin{column}{0.4\linewidth}
\begin{itemize}
\item Simple tool to convert Kokkos named regions/kernels to appear on NVIDIA tools reports
\begin{itemize}
\item Nsight Systems
\item Nsight Compute
\end{itemize}
\item Export environment variable to use the tool
\item Run the program as usual
\item Open the generated report in either tool
\end{itemize}
\end{column}
\end{columns}
\end{frame}

% _____________________________________________________________________________

\section{Subviews}

% _____________________________________________________________________________
Expand All @@ -139,6 +389,10 @@ \section{Layouts}

% _____________________________________________________________________________

\section{Subviews}

% _____________________________________________________________________________

\section{Scatter Views}

\end{document}