diff --git a/courses/02_intermediate/.latexmkrc b/courses/02_intermediate/.latexmkrc index 55126bd..de371e4 100644 --- a/courses/02_intermediate/.latexmkrc +++ b/courses/02_intermediate/.latexmkrc @@ -1 +1,4 @@ -$xelatex = 'xelatex -shell-escape %O %S' +$xelatex = 'xelatex -shell-escape %O %S'; + +# List of extension latexmk can safely clean (correspond to file created by the Beamer plug-in) +$clean_ext = "nav snm vrb"; diff --git a/courses/02_intermediate/main.tex b/courses/02_intermediate/main.tex index bdf15bb..4499c51 100644 --- a/courses/02_intermediate/main.tex +++ b/courses/02_intermediate/main.tex @@ -1,8 +1,12 @@ % !TeX program = xelatex +% Create command `createHandout` only if it doesn't already exists. +% To compile in presentation mode, pass a command createHandout doing nothing. +\providecommand{\createHandout}{handout} + \documentclass[ aspectratio=169, - handout, + \createHandout ]{beamer} \usepackage{minted} @@ -127,6 +131,252 @@ \section{Profiling and debugging} % _____________________________________________________________________________ +\begin{frame}{Profiling and debugging tools at hand} + \begin{columns}[T] + \begin{column}{0.33\linewidth} + Kokkos + + \begin{itemize} + \item KokkosP interface + \item Regions + \end{itemize} + + \vspace{1em} + + \structure{Note:} Not actual tools, but used by tools of the two other categories + \end{column} + \begin{column}{0.33\linewidth} + Kokkos tools + + \begin{itemize} + \item Kernel timer + \item Kernel logger + \item Memory usage + \item Memory events + \item Space time stack + \end{itemize} + \end{column} + \begin{column}{0.33\linewidth} + Third-party tools + + \begin{itemize} + \item VTune + \item Nsight Systems + \item Nsight Compute + \item Tau + \item Timemory + \item Caliper + \item HPCToolkit + \end{itemize} + \end{column} + \end{columns} +\end{frame} + +% _____________________________________________________________________________ + +\begin{frame}{KokkosP interface} + \begin{itemize} + \item Provided by Kokkos + \item Hooks + \begin{itemize} + \item Parallel constructs + \item Fences + \end{itemize} + \item Designed for other tools to use it + \item Always available + \item No overhead if no tools are used + \end{itemize} +\end{frame} + +% _____________________________________________________________________________ + +\begin{frame}[fragile]{Regions} + \begin{columns} + \begin{column}{0.6\linewidth} + \begin{minted}{C++} + Kokkos::Profiling::pushRegion("init"); + + Kokkos::parallel_for( + "initialize A", + N, + KOKKOS_LAMBDA(int i) { + view_a(i) = i; + } + ); + + Kokkos::Profiling::popRegion(); + \end{minted} + \end{column} + \begin{column}{0.4\linewidth} + \begin{itemize} + \item Set regions of interest in your code + \item Provided by Kokkos + \item No specific header needed + \item Namespace \texttt{Kokkos::Profiling} + \item \texttt{pushRegion} and \texttt{popRegion} to create a region + \end{itemize} + \end{column} + \end{columns} +\end{frame} + +% _____________________________________________________________________________ + +\begin{frame}{Kokkos tools} + \begin{itemize} + \item \githublink{\url{https://github.com/kokkos/kokkos-tools}} + \item Has a different version number than Kokkos + \item Should be built and installed somewhere + \item Use one tool at a time with the environment variable \texttt{KOKKOS\_TOOLS\_LIB} + \item Do not ship it within your program (this is a dev tool!) + \end{itemize} +\end{frame} + +% _____________________________________________________________________________ + +\begin{frame}[fragile]{Kernel timer for a basic profiling} + \begin{columns} + \begin{column}{0.6\linewidth} + \begin{minted}[breakafter=/]{sh} + export KOKKOS_TOOLS_LIBS=/absolute/path/to/libkp_kernel_timer.so + + ./my_program + kp_reader ./name_of_report.dat + \end{minted} + \end{column} + \begin{column}{0.4\linewidth} + \begin{itemize} + \item Simple tool for a basic timing analysis + \item Export environment variable to use the tool + \item Run the program as usual + \item Analyze the generated data with the provided \texttt{kp\_reader} program + \end{itemize} + \end{column} + \end{columns} +\end{frame} + +% _____________________________________________________________________________ + +\begin{frame}[fragile]{Kernel timer output} + \begin{minted}[fontsize=\scriptsize,breaklines=false]{text} + (Type) Total Time, Call Count, Avg. Time per Call, %Total Time in Kernels, %Total Program Time + ------------------------------------------------------------------------- + Regions: + ... + ------------------------------------------------------------------------- + Kernels: + ... + ------------------------------------------------------------------------- + Summary: + + Total Execution Time (incl. Kokkos + non-Kokkos): 0.20500 seconds + Total Time in Kokkos kernels: 0.11500 seconds + -> Time outside Kokkos kernels: 0.09000 seconds + -> Percentage in Kokkos kernels: 55.98 % + Total Calls to Kokkos Kernels: 3 + + ------------------------------------------------------------------------- + \end{minted} +\end{frame} + +% _____________________________________________________________________________ + +\begin{frame}[fragile]{Kernel timer output (for regions)} + \begin{minted}[fontsize=\scriptsize,breaklines=false]{text} + (Type) Total Time, Call Count, Avg. Time per Call, %Total Time in Kernels, %Total Program Time + ------------------------------------------------------------------------- + + Regions: + + - mirror and copy + (REGION) 0.092400 1 0.092400 80.448319 45.038345 + - initialization + (REGION) 0.051400 1 0.051400 44.748858 25.052289 + \end{minted} + + \begin{itemize} + \item Regions are named after what you set in \texttt{pushRegion} + \item One set of two lines per region + \end{itemize} +\end{frame} + +% _____________________________________________________________________________ + +\begin{frame}[fragile]{Kernel timer output (for kernels)} + \begin{minted}[fontsize=\scriptsize,breaklines=false]{text} + (Type) Total Time, Call Count, Avg. Time per Call, %Total Time in Kernels, %Total Program Time + + ------------------------------------------------------------------------- + Kernels: + + - Kokkos::View::initialization [vector_mirror] via memset + (ParFor) 0.064600 1 0.064600 56.226650 31.478039 + - Initialize + (ParFor) 0.048400 1 0.048400 42.133665 23.588194 + - Kokkos::View::initialization [vector] via memset + (ParFor) 0.001900 1 0.001900 1.639685 0.917964 + \end{minted} + + \begin{itemize} + \item Kernels are named after what you set in \texttt{parallel\_*}, or have default names + \item One set of two lines per kernel + \end{itemize} +\end{frame} + +% _____________________________________________________________________________ + +\begin{frame}[fragile]{Kernel logger for a basic debugging} + \begin{columns} + \begin{column}{0.6\linewidth} + \begin{minted}[breakafter=/]{sh} + export KOKKOS_TOOLS_LIBS=/absolute/path/to/libkp_kernel_logger.so + + ./my_program + \end{minted} + \end{column} + \begin{column}{0.4\linewidth} + \begin{itemize} + \item Simple tool for a basic timing analysis + \item Export environment variable to use the tool + \item Run the program as usual + \item Analyze the generated data with the provided \texttt{kp\_reader} program + \end{itemize} + \end{column} + \end{columns} +\end{frame} + +% _____________________________________________________________________________ + +\begin{frame}[fragile]{NVTX connector for NVIDIA tools} + \begin{columns} + \begin{column}{0.6\linewidth} + \begin{minted}[breakafter=/]{sh} + export KOKKOS_TOOLS_LIBS=/absolute/path/to/libkp_nvtx_connector.so + + # nsight systems + nsys profile -o report ./my_program + nsys-ui report.nsys-rep + + # nsight compute + ncu -o report ./my_program + \end{minted} + \end{column} + \begin{column}{0.4\linewidth} + \begin{itemize} + \item Simple tool to convert Kokkos named regions/kernels to appear on NVIDIA tools reports + \begin{itemize} + \item Nsight Systems + \item Nsight Compute + \end{itemize} + \item Export environment variable to use the tool + \item Run the program as usual + \item Open the generated report in either tool + \end{itemize} + \end{column} + \end{columns} +\end{frame} + +% _____________________________________________________________________________ + \section{Subviews} % _____________________________________________________________________________ @@ -139,6 +389,10 @@ \section{Layouts} % _____________________________________________________________________________ +\section{Subviews} + +% _____________________________________________________________________________ + \section{Scatter Views} \end{document}