path: root/reproducible-paper.tex
diff options
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2019-06-03 02:21:03 +0200
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2019-06-03 02:21:03 +0200
commit05593d91a4148d2c852e263995a170007dbfb628 (patch)
treee37b349c205d09f7225007b1f940990992f86559 /reproducible-paper.tex
parent90f6b5b44a1cda18990c2f74726f5bf2d1c143c2 (diff)
Slides improved for IAC DNC and SUNDIAL meetings
The slides were significantly upgraded to help in making a better introduction and clearly demonstrating things for the users.
Diffstat (limited to 'reproducible-paper.tex')
1 files changed, 429 insertions, 63 deletions
diff --git a/reproducible-paper.tex b/reproducible-paper.tex
index 2abf09e..e96d60b 100644
--- a/reproducible-paper.tex
+++ b/reproducible-paper.tex
@@ -1,33 +1,77 @@
%% Beamer settings.
-\setbeamertemplate{footline}[frame number]
+%\setbeamertemplate{footline}[frame number]
%% Packages to import.
\usepackage{tcolorbox} %For a color-box.
\usepackage{textcomp} %For a copyright sign.
%% To simplify arXiv links
- (\textcolor{blue}{\href{https://arxiv.org/abs/#1}{arXiv:#1}})}}
+ (\textcolor{blue}{\href{https://arxiv.org/abs/#1}{arXiv:#1}})}}
%% Set the title
-\title{Reproducible scientific paper/project}
+\title{\LARGE \textbf{BIG} data, \textbf{BIG} responsibility:\\
+ \small Template/framework for reproducible scientific projects/papers}
%% Set the author
\author{Mohammad Akhlaghi\\\vspace{2mm}\footnotesize Instituto de
- Astrof\'isica de Canarias ({\scriptsize IAC}),\\Tenerife, Spain
+ Astrof\'isica de Canarias ({\scriptsize IAC}),\\Tenerife, Spain\\
+ \vspace{0.5cm}\includegraphics[width=1.8cm]{img/iac.png}
+ \includegraphics[width=3cm]{img/sundial.png}%\vspace{0.5cm}
%% Set the date and insitutional logos.
+\date{\scriptsize \href{https://www.astro.rug.nl/~sundial/MidtermMeeting.html}{SUNDIAL Midterm meeting}, June 5th, 2019\\ Ghent, Belgium}
+%% For a wider writing width.
+ \begin{minipage}{\dimexpr\textwidth+#1\relax}
+ \raggedright#2
+ \end{minipage}%
+ }%
+%% TiKZ
+\tikzset{ bbox/.style={
+ rectangle,
+ minimum width=2.5cm,
+ rounded corners=2mm,
+ very thick,draw=black!50,
+ top color=white,
+ bottom color=black!20 } }
+\tikzset{ rbox/.style={
+ rectangle,
+ dotted,
+ minimum width=2.5cm,
+ rounded corners=2mm,
+ very thick,draw=red!50!black!50,
+ top color=white,
+ bottom color=red!50!black!20 } }
+\tikzset{ gbox/.style={
+ rectangle,
+ minimum width=2.5cm,
+ very thick,
+ draw=green!50!black!50,
+ top color=white,
+ bottom color=green!50!black!20 } }
@@ -45,9 +89,12 @@
\begin{frame}{Necessity of (exactly) reproducible research}
- \setlength\itemsep{0.3cm}
+ \setlength\itemsep{0.7cm}
\item To be considered \alert{scientific}, any result has to be
\item The tsunami of data, fast internet, and high processing
@@ -55,25 +102,125 @@
\item But these factors have also greatly increased the
\alert{complexity} of an analysis. Making it impossible to
- exactly describe all steps in a published paper.
- \item Most scientific papers thus ignore the ``details'' (as they
- interpret it).
+ exactly describe all steps in a traditional published paper.
+ \item Most scientific papers thus \alert{ignore some ``details''}
+ (as they interpret it).
\item But due to the complexity, even a small deviation from the
exact result, can be due to many different parts of the
analysis. Hence, its \alert{critical to exactly reproduce} a
- \item The software(s) used, configuration file(s), the order of
- steps taken, along with the input data are necessary for
- reproducibility.
- \item \alert{A solution} is proposed here, which if adopted from
- the start, can greatly \alert{simplify a scientific research
- project} and \alert{allow full/exact reproducibility} once it
- is published.
+ \newcommand{\nodeopacity}{1}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\paperinit}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\sver}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\srep}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\dver}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\ddver}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\confopt}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\confenv}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\db}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\calib}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\corr}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\runord}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\runopt}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\humanerr}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\depupdate}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\coauth}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\varsinpaper}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\recordinfo}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\softcite}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\prevchange}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ \newcommand{\paperfinal}{}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
+ %% Don't show the happy scientist any more.
+ \let\paperfinal\undefined
+ \let\paperinit\undefined
+ \begin{frame}{Science is a tricky business}
+ \includegraphics[width=\linewidth]{img/nature-cartoon.jpg}
+ \vspace{-0.2cm}
+ {\tiny Image from nature.com
+ (``\href{https://www.nature.com/articles/d41586-017-07522-z}{Five
+ ways to fix statistics}'', Nov 2017)}
+ \vspace{0.2cm}
+ \begin{tcolorbox}
+ \small Data analysis [...] is a human behaviour. Researchers
+ who hunt hard enough will turn up a result that fits
+ statistical criteria, but their \alert{discovery} will
+ probably be a \alert{false positive}.
+ \hfill Five ways to fix statistics, Nature, 551, Nov 2017.
+ \end{tcolorbox}
+ \end{frame}
+ \begin{frame}{Necessity of (exactly) reproducible research}
+ \begin{tcolorbox}[title=Don't forget that:]
+ \centering Science is defined by its METHOD, \alert{not} its
+ result.
+ \end{tcolorbox}
+ \vspace{0.5cm}
+ \begin{itemize}
+ \setlength\itemsep{0.6cm}
+ \item The software(s) used, configuration file(s), the order of
+ steps taken, along with the input data are necessary for
+ reproducibility.
+ \item \alert{A solution} is proposed here, which if adopted from
+ the start, can greatly \alert{simplify a scientific research
+ project} and \alert{allow full/exact reproducibility} once it
+ is published.
+ \item In the next slides, we'll review the template from the
+ highest level (final research paper) to the lowest (setting up
+ the research environment).
+ \end{itemize}
+ \end{frame}
+ \renewcommand{\nodeopacity}{0.3}
+ \begin{frame}{General outline of a project} \include{tex/plot} \end{frame}
\begin{frame}{Values in final report/paper}
All necessary analysis/processing \alert{input} and \alert{output}
values are written into the final report as \LaTeX{} macros. Shown
@@ -174,43 +321,212 @@
- \begin{frame}{Reproducing the result and report/paper}
- The two \alert{simple} and \alert{familiar} commands below are
- enough to exactly reproduce the results at any time.
+ \begin{frame}{Predefined/exact software tools}
+ \small
+ \begin{columns}
+ \column{5.5cm}
+ \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
+ top=1pt, bottom=1pt, title=Reproducibility \&
+ software]
+ \footnotesize Reproducing the environment (specific
+ \alert{software versions}, \alert{build instructions} and
+ \alert{dependencies}) is also critically important for
+ reproducibility.
+ \end{tcolorbox}
+ \begin{itemize}
+ \setlength\itemsep{0.4cm}
+ \item \emph{Containers} or \emph{Virtual Machines} are a
+ \alert{binary black box}: just contain the environment, not
+ how to set it up, or its history. They are also an overhead.
+ \item This template \alert{installs fixed versions} of all
+ necessary research software and their dependencies, down to
+ the command-line shell, C compiler, POSIX tools and Python
+ interpreter. It just avoids very low-level OS elements like
+ the kernel or linker.
+ \item Installs similar environment on \alert{GNU/Linux}, or
+ \alert{macOS} systems.
+ \item Works very much like a package manager (e.g.,
+ \alert{\texttt{apt}} or \alert{\texttt{brew}}).
+ \end{itemize}
+ \column{5.5cm}
+ \includegraphics[width=\linewidth]{img/software.png}
+ \end{columns}
+ \end{frame}
+ \begin{frame}{Predefined/exact software tools}
+ \small
+ \begin{columns}
+ \column{5.5cm}
+ \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
+ top=1pt, bottom=1pt, title=Reproducibility \&
+ software]
+ \footnotesize Reproducing the environment (specific
+ \alert{software versions}, \alert{build instructions} and
+ \alert{dependencies}) is also critically important for
+ reproducibility.
+ \end{tcolorbox}
+ \begin{itemize}
+ \setlength\itemsep{0.4cm}
+ \item \emph{Containers} or \emph{Virtual Machines} are a
+ \alert{binary black box}: just contain the environment, not
+ how to set it up, or its history. They are also an overhead.
+ \item This template \alert{installs fixed versions} of all
+ necessary research software and their dependencies, down to
+ the command-line shell, C compiler, POSIX tools and Python
+ interpreter. It just avoids very low-level OS elements like
+ the kernel or linker.
+ \item Installs similar environment on \alert{GNU/Linux}, or
+ \alert{macOS} systems.
+ \item Works very much like a package manager (e.g.,
+ \alert{\texttt{apt}} or \alert{\texttt{brew}}).
+ \end{itemize}
+ \column{5.5cm}
+ \includegraphics[width=\linewidth]{img/software-highlighted.png}
+ \end{columns}
+ \end{frame}
+ \newcommand{\redbdir}{\textcolor{green!80!black}{/TEMPLATE/BUILD/DIRECTORY/software/installed/lib}}
+ \begin{frame}{Dependencies are cleanly managed}
- \item[] \texttt{\$ ./configure}
- \item[] \texttt{\$ make}
+ \item All the software are configured and built to use the
+ \alert{template's own builds}: indepenent of host system
+ (\textcolor{green!80!black}{in green}).
+ \item Template even builds a fixed GNU C Compiler (\alert{GCC}).
+ \item Only extremely low-level dependencies (for example C library
+ and Kernel) not built.
+ \begin{itemize}
+ \item GNU C library will also be added later (\alert{in red}).
+ \end{itemize}
- With \texttt{./configure}, you specify the local directories to
- use. All necessary \alert{software} are then \alert{downloaded}
- and installed there (independent of your OS or other projects).
- \vspace{0.3cm} With \texttt{make}, input \alert{data} from online
- archives (databases) are \alert{downloaded}, if not locally
- available, the processing is done, and the \LaTeX{} paper is built
- as a PDF (e.g., see
- \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}
- or
- \textcolor{blue}{\small\href{https://gitlab.com/makhlaghi/reproducible-paper-output/raw/master/paper.pdf}{template's
- output}}).
- \vspace{0.3cm} Enabling version control (e.g., with \alert{Git})
- encourages testing different ideas while not harming the
- initial/base result (thus encouraging \alert{creativity} and
- brainstorming during the project).
- \vspace{0.3cm} After publication, \alert{readers} can
- \alert{change} the input configurations and the numbers and
- figures of the reproduced paper will respectively change. This
- encourages creativity and brainstorming after the project as well
- as sharing of (the hardly gained) experiences with the whole
- community.
+ \vspace{0.5cm}
+ \tiny\texttt{
+ \$ ldd .local/bin/astnoisechisel\\
+ \hspace{0.5cm}libgit2.so.26 => \redbdir/libgit2.so.26 (0x00007febb5232000)\\
+ \hspace{0.5cm}libtiff.so.5 => \redbdir/libtiff.so.5 (0x00007febb51b8000)\\
+ \hspace{0.5cm}liblzma.so.5 => \redbdir/liblzma.so.5 (0x00007febb5190000)\\
+ \hspace{0.5cm}libjpeg.so.9 => \redbdir/libjpeg.so.9 (0x00007febb5153000)\\
+ \hspace{0.5cm}z.so.1 => \redbdir/libz.so.1 (0x00007febb5136000)\\
+ \hspace{0.5cm}wcs.so.6 => \redbdir/libwcs.so.6 (0x00007febb4fcc000)\\
+ \hspace{0.5cm}cfitsio.so.8 => \redbdir/libcfitsio.so.8 (0x00007febb4caf000)\\
+ \hspace{0.5cm}curl.so.4 => \redbdir/libcurl.so.4 (0x00007febb4c35000)\\
+ \hspace{0.5cm}ssl.so.1.1 => \redbdir/libssl.so.1.1 (0x00007febb4b9b000)\\
+ \hspace{0.5cm}crypto.so.1.1 => \redbdir/libcrypto.so.1.1 (0x00007febb48b5000)\\
+ \hspace{0.5cm}gsl.so.23 => \redbdir/libgsl.so.23 (0x00007febb4626000)\\
+ \hspace{0.5cm}gslcblas.so.0 => \redbdir/libgslcblas.so.0 (0x00007febb45e2000)\\
+ \hspace{0.5cm}gnuastro.so.8 => \redbdir/libgnuastro.so.8 (0x00007febb419e000)\\
+ \hspace{0.5cm}bz2.so.1.0 => \redbdir/libbz2.so.1.0 (0x00007febb3e20000)\\
+ \hspace{0.5cm}\alert{m.so.6} => /usr/lib/libm.so.6 (0x00007febb4025000)\\
+ \hspace{0.5cm}\alert{pthread.so.0} => /usr/lib/libpthread.so.0 (0x00007febb4004000)\\
+ \hspace{0.5cm}\alert{c.so.6} => /usr/lib/libc.so.6 (0x00007febb3e3f000)\\
+ \hspace{0.5cm}rt.so.1 => /usr/lib/librt.so.1 (0x00007febb3e35000)\\
+ \hspace{0.5cm}dl.so.2 => /usr/lib/libdl.so.2 (0x00007febb3e1b000)\\
+ \hspace{0.5cm}linux-vdso.so.1 (0x00007ffcf2497000)\\
+ \hspace{0.5cm}/lib64/ld-linux-x86-64.so.2 => /usr/lib64/ld-linux-x86-64.so.2 (0x00007febb53c6000)
+ }
+ \end{frame}
+ \begin{frame}{Advantages of this build system}
+ \begin{columns}
+ \column{7cm}
+ \begin{itemize}
+ \setlength\itemsep{1cm}
+ \item No need for \alert{root}/administrator \alert{permissions}
+ (on servers or super computers).
+ \item Whole system is built \alert{automatically} on any
+ Unix-like operating system (less 2 hours).
+ \item Dependencies of different projects will \alert{not conflict}.
+ \item (Almost) all depencies are \alert{exactly} documened and
+ can be reproduced.
+ \end{itemize}
+ \column{4cm}
+ \includegraphics[width=\linewidth]{img/unchained.jpg}\\
+ \tiny \url{https://natemowry2.wordpress.com}
+ \end{columns}
+ \end{frame}
+ \begin{frame}{Software acknowledgment and citation automatically generated in paper}
+ \includegraphics[width=\linewidth]{img/software-cite.png}
+ \end{frame}
+ \begin{frame}{Software acknowledgment and citation automatically generated in paper}
+ \includegraphics[width=\linewidth]{img/software-cite-highlighted.png}
+% \begin{frame}{Reproducing the result and report/paper}
+% The two \alert{simple} and \alert{familiar} commands below are
+% enough to exactly reproduce the results at any time.
+% \begin{itemize}
+% \item[] \texttt{\$ ./configure}
+% \item[] \texttt{\$ make}
+% \end{itemize}
+% With \texttt{./configure}, you specify the local directories to
+% use. All necessary \alert{software} are then \alert{downloaded}
+% and installed there (independent of your OS or other projects).
+% \vspace{0.3cm} With \texttt{make}, input \alert{data} from online
+% archives (databases) are \alert{downloaded}, if not locally
+% available, the processing is done, and the \LaTeX{} paper is built
+% as a PDF (e.g., see
+% \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}
+% or
+% \textcolor{blue}{\small\href{https://gitlab.com/makhlaghi/reproducible-paper-output/raw/master/paper.pdf}{template's
+% output}}).
+% \vspace{0.3cm} Enabling version control (e.g., with \alert{Git})
+% encourages testing different ideas while not harming the
+% initial/base result (thus encouraging \alert{creativity} and
+% brainstorming during the project).
+% \vspace{0.3cm} After publication, \alert{readers} can
+% \alert{change} the input configurations and the numbers and
+% figures of the reproduced paper will respectively change. This
+% encourages creativity and brainstorming after the project as well
+% as sharing of (the hardly gained) experiences with the whole
+% community.
+% \end{frame}
+ \renewcommand{\nodeopacity}{1}
+ \begin{frame}{Everything in plain text (machine and human readable)}
+ \include{tex/plot} \end{frame}
+ \newcommand{\paperinit}{}
+ \newcommand{\gitlogo}{}
+ \begin{frame}{Everything in plain text (machine and human readable)}
+ \include{tex/plot}
+ \end{frame}
\begin{frame}{Publication of the project}
A reproducible project using this template will have the following
@@ -237,25 +553,75 @@
Gigabytes) and software \\(for example
\textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}) and given a unique DOI.
- \begin{frame}
- The template is ready to use in the link below:
+ \begin{frame}{GOOD NEWS: RDA adoption grant to IAC for this template}
+ \begin{center}
+ \includegraphics[width=3cm]{img/rda.png}\hspace{1cm}
+ \includegraphics[width=1.8cm]{img/iac.png}
- \textcolor{blue}{\footnotesize\url{https://gitlab.com/makhlaghi/reproducible-paper}}
+ \includegraphics[width=\linewidth]{img/h2020.png}
+ \end{center}
- \vspace{1.5cm} For a technical description of the template's
- implementation, as well as a checklist to customize it, and tips
- on good practices, please see this page:
+ \vspace{1cm} For this template, the \alert{IAC} is selected as
+ a \alert{Top European organization} funded to adopt RDA
+ Recommendations and Outputs.
- \textcolor{blue}{\footnotesize\url{https://gitlab.com/makhlaghi/reproducible-paper/blob/pipeline/README-hacking.md}}
+ \vspace{1cm}
+ \scriptsize
+ \begin{itemize}
+ \item Research Data Alliance was launched by the \alert{European
+ Commission}, NSF, National Institute of Standards and
+ Technology, and the Australian Government’s Department of
+ Innovation.
+ \item RDA Outputs are the technical and social infrastructure
+ solutions developed by RDA Working Groups or Interest
+ Groups that enable data sharing, exchange, and
+ interoperability.
+ \end{itemize}
+ \vspace{0.2cm}
+ \centering
+ \end{frame}
+ \begin{frame}{Summary:}
+ A fully working template/framework is introduced that will do the
+ following steps/instructions (all in simple plain text files).
+ \begin{itemize}
+ \item \alert{Automatically downloads} the necessary
+ \emph{software} and \emph{data}.
+ \item \alert{Builds} the software in a \alert{closed
+ environment}.
+ \item Runs the software on data to \alert{generate} the final
+ \alert{research results}.
+ \item A modification in one part of the analysis will only
+ result in re-doing that part, not the whole project.
+ \item Using LaTeX macros, paper's figures, tables and numbers
+ will be \alert{Automatically updated} after a change in
+ analysis. Allowing the scientist to focus on the scientific
+ interpretation.
+ \item The whole project is under \alert{version control} (Git)
+ to allow easy reversion to a previous state. This
+ \alert{encourages tests/experimentation} in the analysis.
+ \item The \alert{Git commit hash} of the project source, is
+ \alert{printed} in the published paper and \alert{saved on
+ output} data products. Ensuring the
+ integrity/reproducibility of the result.
+ \end{itemize}
- \vspace{1.5cm} For more on the necessity of reproducible research,
- please see:
+ \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
+ top=1pt, bottom=1pt]
+ For a technical description of the template's implementation, as
+ well as a checklist to customize it, and tips on good practices,
+ please see this page:
- \textcolor{blue}{\footnotesize\url{http://akhlaghi.org/reproducible-science.html}}
+ \textcolor{blue}{\footnotesize\url{https://gitlab.com/makhlaghi/reproducible-paper/blob/master/README-hacking.md}}
+ \end{tcolorbox}