1 files changed, 34 insertions, 29 deletions
diff --git a/slides-intro.tex b/slides-intro.tex
index f4fcf97..3ffedf9 100644
--- a/slides-intro.tex
+++ b/slides-intro.tex
@@ -383,8 +383,7 @@ for computational reproducibility]
 
       \vspace{3mm}\tiny From ``Attributing and Referencing (Research)
       Software: Best Practices and Outlook from Inria'' (Alliez et
-      al. 2019,
-      \textcolor{blue}{\href{https://hal.archives-ouvertes.fr/hal-02135891}{hal-02135891}})
+      al. 2020, CiSE, DOI:\textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2019.2949413}{10.1109/MCSE.2019.2949413}}).
     }
   \end{frame}
   \begin{frame}{Impact of ``Dependency hell'' on native building in various hardware (CPU architectures)}
@@ -1048,10 +1047,29 @@ for computational reproducibility]
 
 
   %% Make demo.
-  \begin{frame}
-    \LARGE
-    \vspace{1cm}
-    \hfill Let's see how the analysis is managed in a hypothetical project...
+  \begin{frame}{Let's look at the data lineage to replicate Figure 1C (green/tool) of Menke+2020 \\(DOI:\href{https://doi.org/10.1101/2020.01.15.908111}{10.1101/2020.01.15.908111})}
+    \begin{columns}
+      \column{0.55\linewidth}
+      \textcolor{blue}{ORIGINAL PLOT}
+
+      The Green plot shows the fraction of papers mentioning software tools from 1997 to 2019.
+      \column{0.45\linewidth}
+      \includegraphics[width=\linewidth]{img/tools-per-year-orig.jpg}
+    \end{columns}
+
+    \rule{\textwidth}{1pt}
+
+    \begin{columns}
+      \column{0.4\linewidth}
+      \textcolor{green!70!black}{OUR enhanced REPLICATION}
+
+      The green line is same as above but over their full historical range.
+
+      Red histogram is the number of papers studied in each year
+      \column{0.6\linewidth}
+      \vspace{1cm}
+      \includegraphics[width=\linewidth]{img/tools-per-year.pdf}
+    \end{columns}
   \end{frame}
   \makedemoslide{img/data-lineage-1.pdf}
                 {Makefiles (\texttt{\*.mk}) keep contextually separate parts of the project, all imported into \texttt{top-make.mk}}
@@ -1066,37 +1084,24 @@ for computational reproducibility]
   \makedemoslide{img/data-lineage-6.pdf}
                 {Basic project info comes from \texttt{initialize.tex}.}
   \makedemoslide{img/data-lineage-7.pdf}
-                {Reported values about the downloaded inputs come from \texttt{download.tex}.}
+                {The paper includes some information about the plot.}
   \makedemoslide{img/data-lineage-8.pdf}
-                {... for example the number of rows in the second input (a catalog) of the project.}
+                {The final plotted data are calculated and stored in \texttt{tools-per-year.txt}.}
   \makedemoslide{img/data-lineage-9.pdf}
-                {The URL to download \texttt{input2.dat}, and a checksum to validate it, are stored in \texttt{INPUTS.conf}.}
+                {The plot's calculation is done on a formatted sub-set of the raw input data.}
   \makedemoslide{img/data-lineage-10.pdf}
-                {Reported values from first analysis steps stored in \texttt{analysis1.tex}.}
+                {The raw data that were downloaded are stored in XLSX format.}
   \makedemoslide{img/data-lineage-11.pdf}
-                {... for example the average of the numbers in \texttt{out-1b.dat}.}
+                {The download URL \emph{and} a \alert{checksum to validate} the raw inputs, are stored in \texttt{INPUTS.conf}.}
   \makedemoslide{img/data-lineage-12.pdf}
-                {But \texttt{out-1b.dat} itself depends on other files and a paramter (for example a multiple of sigma).}
+                {We also need to report the URL in the paper...}
   \makedemoslide{img/data-lineage-13.pdf}
-                {\texttt{out-1a.dat} is built from a downloaded dataset.}
+                {Some general info about the full dataset may also be reported.}
   \makedemoslide{img/data-lineage-14.pdf}
-                {Download URL and checksum of \texttt{input1.dat} also stored in \texttt{INPUTS.conf}.}
+                {We report the number of papers studied in a special year, desired year is stored in \texttt{.conf} file.}
+
   \makedemoslide{img/data-lineage-15.pdf}
-                {Reported values from second analysis steps stored in \texttt{analysis2.tex}.}
-  \makedemoslide{img/data-lineage-16.pdf}
-                {... for example the number of selected rows in \texttt{out-2b.dat}.}
-  \makedemoslide{img/data-lineage-17.pdf}
-                {\texttt{out-2b.dat} is derived from \texttt{out-1b.dat} (for example, rejected some of \texttt{out-1b.dat}'s rows).}
-  \makedemoslide{img/data-lineage-18.pdf}
-                {Reported values from third analysis steps stored in \texttt{analysis3.tex}.}
-  \makedemoslide{img/data-lineage-19.pdf}
-                {... for example measurements from both \texttt{out-3a.dat} and \texttt{out-3b.dat}.}
-  \makedemoslide{img/data-lineage-20.pdf}
-                {\texttt{out-3b.dat} is generated from an analysis on \texttt{out-2a.dat}.}
-  \makedemoslide{img/data-lineage-21.pdf}
-                {But \texttt{out-2a.dat} itself is generated from \texttt{input1.dat} and an analysis which has two settings.}
-  \makedemoslide{img/data-lineage-22.pdf}
-                {\texttt{out-3a.dat} also depends on \texttt{out-1a.dat} and an analysis with needs one parameter.}
+                {It is very easy to expand the project and add new analysis steps (this solution is scalable)}