@@ -95,6 +95,21 @@ \section{Introduction}
9595
9696% _____________________________________________________________________________
9797
98+ \begin {frame }
99+ \frametitle {This course is open-source}
100+
101+ \begin {center }
102+
103+ \includegraphics [width=0.3\textwidth ]{../../images/GitHub-logo.png}
104+
105+ \href {https://github.com/CExA-project/cexa-kokkos-tutorials}{https://github.com/CExA-project/cexa-kokkos-tutorials}
106+
107+ \end {center }
108+
109+ \end {frame }
110+
111+ % _____________________________________________________________________________
112+
98113\begin {frame }
99114\frametitle {Prerequisite}
100115
@@ -117,7 +132,7 @@ \section{Introduction}
117132 \begin {itemize }
118133 \item Course + practical work: full day
119134 \item Course + corrected exercise: half day
120- \item Fest version: 3 hours
135+ \item Short version: 3 hours
121136 \end {itemize }
122137
123138 \end {frame }
@@ -136,12 +151,14 @@ \section{Introduction}
136151% _____________________________________________________________________________
137152
138153\begin {frame }
139- \frametitle {Current super-computers use a wide variety of hardware technologies}
154+ \frametitle {Current supercomputers use a wide variety of hardware technologies}
140155
156+ \begin {figure }
141157\begin {center }
142158 \includegraphics [width=1\textwidth ]{../../images/top10_super_computers.png}
159+ \caption {Top 10 supercomputers in the world in June 2024}
143160\end {center }
144-
161+ \end { figure }
145162
146163\begin {itemize }
147164\item Heterogeneous nodes: mix of CPU and GPU accelerators
@@ -153,22 +170,28 @@ \section{Introduction}
153170% _____________________________________________________________________________
154171
155172\begin {frame }
156- \frametitle {EuroHPC super-computers }
173+ \frametitle {EuroHPC supercomputers }
157174
175+ \begin {figure }
158176\begin {center }
159177 \includegraphics [width=1\textwidth ]{../../images/euroHPC.png}
178+ \caption {EuroHPC supercomputers}
160179\end {center }
180+ \end {figure }
161181
162182\end {frame }
163183
164184% _____________________________________________________________________________
165185
166186\begin {frame }
167- \frametitle {National academic super-computers }
187+ \frametitle {National academic supercomputers }
168188
169- \begin {center }
170- \includegraphics [width=1\textwidth ]{../../images/french_super_computers.png}
171- \end {center }
189+ \begin {figure }
190+ \begin {center }
191+ \includegraphics [width=0.9\textwidth ]{../../images/french_super_computers.png}
192+ \caption {Academic and CEA supercomputers}
193+ \end {center }
194+ \end {figure }
172195
173196\end {frame }
174197
@@ -189,10 +212,10 @@ \section{Introduction}
189212% _____________________________________________________________________________
190213
191214\begin {frame }
192- \frametitle {Basic definition of a super-computer }
215+ \frametitle {Basic definition of a supercomputer }
193216
194217 \begin {itemize }
195- \item Super-computer is a distributed memory system composed of many compute nodes packed into racks and linked by a high-speed network
218+ \item A supercomputer is a distributed memory system composed of many compute nodes packed into racks and linked by a high-speed network
196219 \item Compute nodes are composed of one or more CPUs and one or more GPUs
197220 \end {itemize }
198221
@@ -209,7 +232,7 @@ \section{Introduction}
209232 Architecture description
210233
211234 \begin {itemize }
212- \item CPU are designed for general purpose, from sequential task to parallel computing
235+ \item CPUs are designed for general purpose, from sequential task to parallel computing
213236 \item tens to hundred of cores in biggest processors
214237 \item SIMD (Single Instruction Multiple Data) units for accelerating arithmetic operations
215238 \end {itemize }
@@ -224,15 +247,17 @@ \section{Introduction}
224247% _____________________________________________________________________________
225248
226249\begin {frame }
227- \frametitle {Zoom on the GPU architecture}
250+ \frametitle {Zoom on the GPGPU architecture}
228251
252+ \small
229253\begin {itemize }
230- \item Large SIMT vector unit (Single Instruction Multiple Threads) per computing unit
254+ \item GPGPUs (General Purpose Graphic Processing Units) are designed to achieve massive parallelism of simple kernels
231255 \item hundreds of computing units, thousands of threads
232- \item Each vendor has its own vocabulary
256+ \item Large SIMT vector unit (Single Instruction Multiple Threads) per computing unit
233257\end {itemize }
234258
235- \hspace {0.5cm}
259+ \normalsize
260+ \hspace {0.2cm}
236261
237262\begin {center }
238263 \includegraphics [width=0.7\textwidth ]{../../images/gpu_architecture.png}
@@ -265,12 +290,16 @@ \section{Introduction}
265290% _____________________________________________________________________________
266291
267292\begin {frame }
268- \frametitle {What's the motivation putting GPUs in super-computers ?}
293+ \frametitle {What's the motivation of putting GPUs in supercomputers ?}
269294
270295\begin {center }
271296 \includegraphics [width=0.9\textwidth ]{../../images/flop_watt_ratio_history_fp64.png}
272297\end {center }
273298
299+ \begin {itemize }
300+ \item Question: how to build the fastest machine with the lowest power consumption and the lowest cost?
301+ \end {itemize }
302+
274303\end {frame }
275304
276305% _____________________________________________________________________________
@@ -305,7 +334,7 @@ \section{Introduction}
305334In the field of numerical simulation, engineers also want:
306335
307336\begin {itemize }
308- \item \textbf {Maturity: } production-ready not research product, number of users
337+ \item \textbf {Maturity: } production-ready not research product
309338 \item \textbf {Community: } contributors, support, documentation, examples
310339 \item \textbf {Longevity: } long-term maintenance, bug fixes, updates, sponsorship
311340 \item \textbf {Interporability: } possibility to easily couple with other libraries and tools (IO, Linear Algebra, Machine leaning, etc.)
@@ -318,13 +347,18 @@ \section{Introduction}
318347\frametitle {Do you need performance portability?}
319348Probably yes if you:
320349\begin {itemize }
321- \item Want your code runs on different hardware technologies (from CPU to GPU)
350+ \item Want your code to run on different hardware technologies (from CPU to GPU)
322351 \item Have performance requirements
323352 \item Can not afford to maintain and optimize different versions of your code for each possible hardware of the market
324- \item You want to focus on the algorithm and not on the development of the code
353+ \item You want to focus on algorithmic and not code development
325354 \item Want you code to be easily maintainable and readable by others, especially non experts
326355\end {itemize }
327356
357+ Maybe not if:
358+ \begin {itemize }
359+ \item Need to tune your algorithms to work the fast as possible on a given hardware
360+ \end {itemize }
361+
328362\end {frame }
329363
330364% _____________________________________________________________________________
@@ -342,7 +376,7 @@ \section{Introduction}
342376 \end {itemize }
343377
344378 \begin {center }
345- \includegraphics [width=0.9 \textwidth ]{../../images/kokkos_model.png}
379+ \includegraphics [width=1 \textwidth ]{../../images/kokkos_model.png}
346380 \end {center }
347381
348382\end {frame }
@@ -353,7 +387,7 @@ \section{Introduction}
353387 \frametitle {Kokkos parallelism scope}
354388
355389 \begin {itemize }
356- \item Kokkos Core only handle node-level parallelism
390+ \item Kokkos only handle node-level parallelism
357391 \item You still need a model for distributed memory parallelism (MPI for instance)
358392 \end {itemize }
359393
@@ -435,7 +469,7 @@ \section{Basic concepts of Kokkos}
435469 \begin {itemize }
436470 \item Require CMake and a C++ compiler
437471 \item Compile Kokkos as an external library or as part of your project (inline build) using the source
438- \item You can as use Spack to get a precompiled version of Kokkos
472+ \item You can use Spack to install Kokkos
439473 \item You can get the source code from the \href {https://github.com/kokkos/kokkos}{Kokkos GitHub repository}
440474 \end {itemize }
441475
@@ -530,7 +564,7 @@ \section{Basic concepts of Kokkos}
530564 \item Architecture cmake options \texttt {-DKokkos\_ ARCH\_ <ARCH\_ NAME>=ON } can be specified for best performance
531565 \item Replace \texttt {<ARCH\_ NAME> } with the architecture name of the target host and device
532566 \item For example, for a NVIDIA A100 GPU, the architecture name is \texttt {AMPERE80 }
533- \item All Cmake option are available \href {https://kokkos.org/kokkos-core-wiki/keywords.html#}{on the dedicated doc page}
567+ \item All Cmake options are available \href {https://kokkos.org/kokkos-core-wiki/keywords.html#}{on the dedicated doc page}
534568\end {itemize }
535569
536570\end {frame }
0 commit comments