aboutsummaryrefslogtreecommitdiff
path: root/slides-intro.tex
diff options
context:
space:
mode:
Diffstat (limited to 'slides-intro.tex')
-rw-r--r--slides-intro.tex63
1 files changed, 34 insertions, 29 deletions
diff --git a/slides-intro.tex b/slides-intro.tex
index f4fcf97..3ffedf9 100644
--- a/slides-intro.tex
+++ b/slides-intro.tex
@@ -383,8 +383,7 @@ for computational reproducibility]
\vspace{3mm}\tiny From ``Attributing and Referencing (Research)
Software: Best Practices and Outlook from Inria'' (Alliez et
- al. 2019,
- \textcolor{blue}{\href{https://hal.archives-ouvertes.fr/hal-02135891}{hal-02135891}})
+ al. 2020, CiSE, DOI:\textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2019.2949413}{10.1109/MCSE.2019.2949413}}).
}
\end{frame}
\begin{frame}{Impact of ``Dependency hell'' on native building in various hardware (CPU architectures)}
@@ -1048,10 +1047,29 @@ for computational reproducibility]
%% Make demo.
- \begin{frame}
- \LARGE
- \vspace{1cm}
- \hfill Let's see how the analysis is managed in a hypothetical project...
+ \begin{frame}{Let's look at the data lineage to replicate Figure 1C (green/tool) of Menke+2020 \\(DOI:\href{https://doi.org/10.1101/2020.01.15.908111}{10.1101/2020.01.15.908111})}
+ \begin{columns}
+ \column{0.55\linewidth}
+ \textcolor{blue}{ORIGINAL PLOT}
+
+ The Green plot shows the fraction of papers mentioning software tools from 1997 to 2019.
+ \column{0.45\linewidth}
+ \includegraphics[width=\linewidth]{img/tools-per-year-orig.jpg}
+ \end{columns}
+
+ \rule{\textwidth}{1pt}
+
+ \begin{columns}
+ \column{0.4\linewidth}
+ \textcolor{green!70!black}{OUR enhanced REPLICATION}
+
+ The green line is same as above but over their full historical range.
+
+ Red histogram is the number of papers studied in each year
+ \column{0.6\linewidth}
+ \vspace{1cm}
+ \includegraphics[width=\linewidth]{img/tools-per-year.pdf}
+ \end{columns}
\end{frame}
\makedemoslide{img/data-lineage-1.pdf}
{Makefiles (\texttt{\*.mk}) keep contextually separate parts of the project, all imported into \texttt{top-make.mk}}
@@ -1066,37 +1084,24 @@ for computational reproducibility]
\makedemoslide{img/data-lineage-6.pdf}
{Basic project info comes from \texttt{initialize.tex}.}
\makedemoslide{img/data-lineage-7.pdf}
- {Reported values about the downloaded inputs come from \texttt{download.tex}.}
+ {The paper includes some information about the plot.}
\makedemoslide{img/data-lineage-8.pdf}
- {... for example the number of rows in the second input (a catalog) of the project.}
+ {The final plotted data are calculated and stored in \texttt{tools-per-year.txt}.}
\makedemoslide{img/data-lineage-9.pdf}
- {The URL to download \texttt{input2.dat}, and a checksum to validate it, are stored in \texttt{INPUTS.conf}.}
+ {The plot's calculation is done on a formatted sub-set of the raw input data.}
\makedemoslide{img/data-lineage-10.pdf}
- {Reported values from first analysis steps stored in \texttt{analysis1.tex}.}
+ {The raw data that were downloaded are stored in XLSX format.}
\makedemoslide{img/data-lineage-11.pdf}
- {... for example the average of the numbers in \texttt{out-1b.dat}.}
+ {The download URL \emph{and} a \alert{checksum to validate} the raw inputs, are stored in \texttt{INPUTS.conf}.}
\makedemoslide{img/data-lineage-12.pdf}
- {But \texttt{out-1b.dat} itself depends on other files and a paramter (for example a multiple of sigma).}
+ {We also need to report the URL in the paper...}
\makedemoslide{img/data-lineage-13.pdf}
- {\texttt{out-1a.dat} is built from a downloaded dataset.}
+ {Some general info about the full dataset may also be reported.}
\makedemoslide{img/data-lineage-14.pdf}
- {Download URL and checksum of \texttt{input1.dat} also stored in \texttt{INPUTS.conf}.}
+ {We report the number of papers studied in a special year, desired year is stored in \texttt{.conf} file.}
+
\makedemoslide{img/data-lineage-15.pdf}
- {Reported values from second analysis steps stored in \texttt{analysis2.tex}.}
- \makedemoslide{img/data-lineage-16.pdf}
- {... for example the number of selected rows in \texttt{out-2b.dat}.}
- \makedemoslide{img/data-lineage-17.pdf}
- {\texttt{out-2b.dat} is derived from \texttt{out-1b.dat} (for example, rejected some of \texttt{out-1b.dat}'s rows).}
- \makedemoslide{img/data-lineage-18.pdf}
- {Reported values from third analysis steps stored in \texttt{analysis3.tex}.}
- \makedemoslide{img/data-lineage-19.pdf}
- {... for example measurements from both \texttt{out-3a.dat} and \texttt{out-3b.dat}.}
- \makedemoslide{img/data-lineage-20.pdf}
- {\texttt{out-3b.dat} is generated from an analysis on \texttt{out-2a.dat}.}
- \makedemoslide{img/data-lineage-21.pdf}
- {But \texttt{out-2a.dat} itself is generated from \texttt{input1.dat} and an analysis which has two settings.}
- \makedemoslide{img/data-lineage-22.pdf}
- {\texttt{out-3a.dat} also depends on \texttt{out-1a.dat} and an analysis with needs one parameter.}
+ {It is very easy to expand the project and add new analysis steps (this solution is scalable)}