From 05593d91a4148d2c852e263995a170007dbfb628 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Mon, 3 Jun 2019 02:21:03 +0200 Subject: Slides improved for IAC DNC and SUNDIAL meetings The slides were significantly upgraded to help in making a better introduction and clearly demonstrating things for the users. --- reproducible-paper.tex | 492 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 429 insertions(+), 63 deletions(-) (limited to 'reproducible-paper.tex') diff --git a/reproducible-paper.tex b/reproducible-paper.tex index 2abf09e..e96d60b 100644 --- a/reproducible-paper.tex +++ b/reproducible-paper.tex @@ -1,33 +1,77 @@ \documentclass[9pt]{beamer} - %% Beamer settings. -\setbeamertemplate{footline}[frame number] - +%\setbeamertemplate{footline}[frame number] %% Packages to import. \usepackage{tcolorbox} %For a color-box. \usepackage{textcomp} %For a copyright sign. - %% To simplify arXiv links \newcommand{\arxivlink}[1]{{\footnotesize - (\textcolor{blue}{\href{https://arxiv.org/abs/#1}{arXiv:#1}})}} - - + (\textcolor{blue}{\href{https://arxiv.org/abs/#1}{arXiv:#1}})}} %% Set the title -\title{Reproducible scientific paper/project} - +\title{\LARGE \textbf{BIG} data, \textbf{BIG} responsibility:\\ + \small Template/framework for reproducible scientific projects/papers} %% Set the author \author{Mohammad Akhlaghi\\\vspace{2mm}\footnotesize Instituto de - Astrof\'isica de Canarias ({\scriptsize IAC}),\\Tenerife, Spain + Astrof\'isica de Canarias ({\scriptsize IAC}),\\Tenerife, Spain\\ + \vspace{0.5cm}\includegraphics[width=1.8cm]{img/iac.png} + \includegraphics[width=3cm]{img/sundial.png}%\vspace{0.5cm} } - %% Set the date and insitutional logos. -\date{\includegraphics[width=2cm]{img/iac.png}} +\date{\scriptsize \href{https://www.astro.rug.nl/~sundial/MidtermMeeting.html}{SUNDIAL Midterm meeting}, June 5th, 2019\\ Ghent, Belgium} + +%% For a wider writing width. +\newcommand\Wider[2][3em]{% +\makebox[\linewidth][c]{% + \begin{minipage}{\dimexpr\textwidth+#1\relax} + \raggedright#2 + \end{minipage}% + }% +} + +%% TiKZ +\usepackage{tikz} +\usetikzlibrary{graphs} +\usetikzlibrary{positioning} +\tikzset{ bbox/.style={ + rectangle, + minimum width=2.5cm, + rounded corners=2mm, + very thick,draw=black!50, + top color=white, + bottom color=black!20 } } + +\tikzset{ rbox/.style={ + rectangle, + dotted, + minimum width=2.5cm, + rounded corners=2mm, + very thick,draw=red!50!black!50, + top color=white, + bottom color=red!50!black!20 } } + +\tikzset{ gbox/.style={ + rectangle, + minimum width=2.5cm, + very thick, + draw=green!50!black!50, + top color=white, + bottom color=green!50!black!20 } } + + + + + + + + + + @@ -45,9 +89,12 @@ \end{frame} + + + \begin{frame}{Necessity of (exactly) reproducible research} \begin{itemize} - \setlength\itemsep{0.3cm} + \setlength\itemsep{0.7cm} \item To be considered \alert{scientific}, any result has to be reproducible. \item The tsunami of data, fast internet, and high processing @@ -55,25 +102,125 @@ result}. \item But these factors have also greatly increased the \alert{complexity} of an analysis. Making it impossible to - exactly describe all steps in a published paper. - \item Most scientific papers thus ignore the ``details'' (as they - interpret it). + exactly describe all steps in a traditional published paper. + \item Most scientific papers thus \alert{ignore some ``details''} + (as they interpret it). \item But due to the complexity, even a small deviation from the exact result, can be due to many different parts of the analysis. Hence, its \alert{critical to exactly reproduce} a result. - \item The software(s) used, configuration file(s), the order of - steps taken, along with the input data are necessary for - reproducibility. - \item \alert{A solution} is proposed here, which if adopted from - the start, can greatly \alert{simplify a scientific research - project} and \alert{allow full/exact reproducibility} once it - is published. \end{itemize} \end{frame} + + \newcommand{\nodeopacity}{1} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\paperinit}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\sver}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\srep}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\dver}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\ddver}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\confopt}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\confenv}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\db}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\calib}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\corr}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\runord}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\runopt}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\humanerr}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\depupdate}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\coauth}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\varsinpaper}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\recordinfo}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\softcite}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\prevchange}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + \newcommand{\paperfinal}{} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + + %% Don't show the happy scientist any more. + \let\paperfinal\undefined + \let\paperinit\undefined + + + + + + \begin{frame}{Science is a tricky business} + \includegraphics[width=\linewidth]{img/nature-cartoon.jpg} + + \vspace{-0.2cm} + + {\tiny Image from nature.com + (``\href{https://www.nature.com/articles/d41586-017-07522-z}{Five + ways to fix statistics}'', Nov 2017)} + + \vspace{0.2cm} + \begin{tcolorbox} + \small Data analysis [...] is a human behaviour. Researchers + who hunt hard enough will turn up a result that fits + statistical criteria, but their \alert{discovery} will + probably be a \alert{false positive}. + + \hfill Five ways to fix statistics, Nature, 551, Nov 2017. + \end{tcolorbox} + \end{frame} + + + + + + \begin{frame}{Necessity of (exactly) reproducible research} + \begin{tcolorbox}[title=Don't forget that:] + \centering Science is defined by its METHOD, \alert{not} its + result. + \end{tcolorbox} + + \vspace{0.5cm} + \begin{itemize} + \setlength\itemsep{0.6cm} + \item The software(s) used, configuration file(s), the order of + steps taken, along with the input data are necessary for + reproducibility. + \item \alert{A solution} is proposed here, which if adopted from + the start, can greatly \alert{simplify a scientific research + project} and \alert{allow full/exact reproducibility} once it + is published. + \item In the next slides, we'll review the template from the + highest level (final research paper) to the lowest (setting up + the research environment). + \end{itemize} + \end{frame} + + + + \renewcommand{\nodeopacity}{0.3} + \begin{frame}{General outline of a project} \include{tex/plot} \end{frame} + + + + + \begin{frame}{Values in final report/paper} All necessary analysis/processing \alert{input} and \alert{output} values are written into the final report as \LaTeX{} macros. Shown @@ -174,43 +321,212 @@ \end{frame} - \begin{frame}{Reproducing the result and report/paper} - The two \alert{simple} and \alert{familiar} commands below are - enough to exactly reproduce the results at any time. + + + + \begin{frame}{Predefined/exact software tools} + \small + \begin{columns} + \column{5.5cm} + \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt, + top=1pt, bottom=1pt, title=Reproducibility \& + software] + \footnotesize Reproducing the environment (specific + \alert{software versions}, \alert{build instructions} and + \alert{dependencies}) is also critically important for + reproducibility. + \end{tcolorbox} + + \begin{itemize} + \setlength\itemsep{0.4cm} + \item \emph{Containers} or \emph{Virtual Machines} are a + \alert{binary black box}: just contain the environment, not + how to set it up, or its history. They are also an overhead. + + \item This template \alert{installs fixed versions} of all + necessary research software and their dependencies, down to + the command-line shell, C compiler, POSIX tools and Python + interpreter. It just avoids very low-level OS elements like + the kernel or linker. + + \item Installs similar environment on \alert{GNU/Linux}, or + \alert{macOS} systems. + + \item Works very much like a package manager (e.g., + \alert{\texttt{apt}} or \alert{\texttt{brew}}). + \end{itemize} + + \column{5.5cm} + \includegraphics[width=\linewidth]{img/software.png} + \end{columns} + \end{frame} + + + + + + \begin{frame}{Predefined/exact software tools} + \small + \begin{columns} + \column{5.5cm} + \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt, + top=1pt, bottom=1pt, title=Reproducibility \& + software] + \footnotesize Reproducing the environment (specific + \alert{software versions}, \alert{build instructions} and + \alert{dependencies}) is also critically important for + reproducibility. + \end{tcolorbox} + + \begin{itemize} + \setlength\itemsep{0.4cm} + \item \emph{Containers} or \emph{Virtual Machines} are a + \alert{binary black box}: just contain the environment, not + how to set it up, or its history. They are also an overhead. + + \item This template \alert{installs fixed versions} of all + necessary research software and their dependencies, down to + the command-line shell, C compiler, POSIX tools and Python + interpreter. It just avoids very low-level OS elements like + the kernel or linker. + + \item Installs similar environment on \alert{GNU/Linux}, or + \alert{macOS} systems. + + \item Works very much like a package manager (e.g., + \alert{\texttt{apt}} or \alert{\texttt{brew}}). + \end{itemize} + + \column{5.5cm} + \includegraphics[width=\linewidth]{img/software-highlighted.png} + \end{columns} + \end{frame} + + + \newcommand{\redbdir}{\textcolor{green!80!black}{/TEMPLATE/BUILD/DIRECTORY/software/installed/lib}} + \begin{frame}{Dependencies are cleanly managed} \begin{itemize} - \item[] \texttt{\$ ./configure} - \item[] \texttt{\$ make} + \item All the software are configured and built to use the + \alert{template's own builds}: indepenent of host system + (\textcolor{green!80!black}{in green}). + \item Template even builds a fixed GNU C Compiler (\alert{GCC}). + \item Only extremely low-level dependencies (for example C library + and Kernel) not built. + \begin{itemize} + \item GNU C library will also be added later (\alert{in red}). + \end{itemize} \end{itemize} - With \texttt{./configure}, you specify the local directories to - use. All necessary \alert{software} are then \alert{downloaded} - and installed there (independent of your OS or other projects). - - \vspace{0.3cm} With \texttt{make}, input \alert{data} from online - archives (databases) are \alert{downloaded}, if not locally - available, the processing is done, and the \LaTeX{} paper is built - as a PDF (e.g., see - \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}} - or - \textcolor{blue}{\small\href{https://gitlab.com/makhlaghi/reproducible-paper-output/raw/master/paper.pdf}{template's - output}}). - - \vspace{0.3cm} Enabling version control (e.g., with \alert{Git}) - encourages testing different ideas while not harming the - initial/base result (thus encouraging \alert{creativity} and - brainstorming during the project). - - \vspace{0.3cm} After publication, \alert{readers} can - \alert{change} the input configurations and the numbers and - figures of the reproduced paper will respectively change. This - encourages creativity and brainstorming after the project as well - as sharing of (the hardly gained) experiences with the whole - community. + \vspace{0.5cm} + \tiny\texttt{ + \$ ldd .local/bin/astnoisechisel\\ + \hspace{0.5cm}libgit2.so.26 => \redbdir/libgit2.so.26 (0x00007febb5232000)\\ + \hspace{0.5cm}libtiff.so.5 => \redbdir/libtiff.so.5 (0x00007febb51b8000)\\ + \hspace{0.5cm}liblzma.so.5 => \redbdir/liblzma.so.5 (0x00007febb5190000)\\ + \hspace{0.5cm}libjpeg.so.9 => \redbdir/libjpeg.so.9 (0x00007febb5153000)\\ + \hspace{0.5cm}z.so.1 => \redbdir/libz.so.1 (0x00007febb5136000)\\ + \hspace{0.5cm}wcs.so.6 => \redbdir/libwcs.so.6 (0x00007febb4fcc000)\\ + \hspace{0.5cm}cfitsio.so.8 => \redbdir/libcfitsio.so.8 (0x00007febb4caf000)\\ + \hspace{0.5cm}curl.so.4 => \redbdir/libcurl.so.4 (0x00007febb4c35000)\\ + \hspace{0.5cm}ssl.so.1.1 => \redbdir/libssl.so.1.1 (0x00007febb4b9b000)\\ + \hspace{0.5cm}crypto.so.1.1 => \redbdir/libcrypto.so.1.1 (0x00007febb48b5000)\\ + \hspace{0.5cm}gsl.so.23 => \redbdir/libgsl.so.23 (0x00007febb4626000)\\ + \hspace{0.5cm}gslcblas.so.0 => \redbdir/libgslcblas.so.0 (0x00007febb45e2000)\\ + \hspace{0.5cm}gnuastro.so.8 => \redbdir/libgnuastro.so.8 (0x00007febb419e000)\\ + \hspace{0.5cm}bz2.so.1.0 => \redbdir/libbz2.so.1.0 (0x00007febb3e20000)\\ + \hspace{0.5cm}\alert{m.so.6} => /usr/lib/libm.so.6 (0x00007febb4025000)\\ + \hspace{0.5cm}\alert{pthread.so.0} => /usr/lib/libpthread.so.0 (0x00007febb4004000)\\ + \hspace{0.5cm}\alert{c.so.6} => /usr/lib/libc.so.6 (0x00007febb3e3f000)\\ + \hspace{0.5cm}rt.so.1 => /usr/lib/librt.so.1 (0x00007febb3e35000)\\ + \hspace{0.5cm}dl.so.2 => /usr/lib/libdl.so.2 (0x00007febb3e1b000)\\ + \hspace{0.5cm}linux-vdso.so.1 (0x00007ffcf2497000)\\ + \hspace{0.5cm}/lib64/ld-linux-x86-64.so.2 => /usr/lib64/ld-linux-x86-64.so.2 (0x00007febb53c6000) + } + \end{frame} + + + + + \begin{frame}{Advantages of this build system} + \begin{columns} + \column{7cm} + \begin{itemize} + \setlength\itemsep{1cm} + \item No need for \alert{root}/administrator \alert{permissions} + (on servers or super computers). + \item Whole system is built \alert{automatically} on any + Unix-like operating system (less 2 hours). + \item Dependencies of different projects will \alert{not conflict}. + \item (Almost) all depencies are \alert{exactly} documened and + can be reproduced. + \end{itemize} + \column{4cm} + \includegraphics[width=\linewidth]{img/unchained.jpg}\\ + \tiny \url{https://natemowry2.wordpress.com} + \end{columns} + \end{frame} + + + + \begin{frame}{Software acknowledgment and citation automatically generated in paper} + \includegraphics[width=\linewidth]{img/software-cite.png} + \end{frame} + \begin{frame}{Software acknowledgment and citation automatically generated in paper} + \includegraphics[width=\linewidth]{img/software-cite-highlighted.png} \end{frame} + + +% \begin{frame}{Reproducing the result and report/paper} +% The two \alert{simple} and \alert{familiar} commands below are +% enough to exactly reproduce the results at any time. +% +% \begin{itemize} +% \item[] \texttt{\$ ./configure} +% \item[] \texttt{\$ make} +% \end{itemize} +% +% With \texttt{./configure}, you specify the local directories to +% use. All necessary \alert{software} are then \alert{downloaded} +% and installed there (independent of your OS or other projects). +% +% \vspace{0.3cm} With \texttt{make}, input \alert{data} from online +% archives (databases) are \alert{downloaded}, if not locally +% available, the processing is done, and the \LaTeX{} paper is built +% as a PDF (e.g., see +% \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}} +% or +% \textcolor{blue}{\small\href{https://gitlab.com/makhlaghi/reproducible-paper-output/raw/master/paper.pdf}{template's +% output}}). +% +% \vspace{0.3cm} Enabling version control (e.g., with \alert{Git}) +% encourages testing different ideas while not harming the +% initial/base result (thus encouraging \alert{creativity} and +% brainstorming during the project). +% +% \vspace{0.3cm} After publication, \alert{readers} can +% \alert{change} the input configurations and the numbers and +% figures of the reproduced paper will respectively change. This +% encourages creativity and brainstorming after the project as well +% as sharing of (the hardly gained) experiences with the whole +% community. +% \end{frame} + + + + \renewcommand{\nodeopacity}{1} + \begin{frame}{Everything in plain text (machine and human readable)} + \include{tex/plot} \end{frame} + \newcommand{\paperinit}{} + \newcommand{\gitlogo}{} + \begin{frame}{Everything in plain text (machine and human readable)} + \include{tex/plot} + \end{frame} + + \begin{frame}{Publication of the project} A reproducible project using this template will have the following @@ -237,25 +553,75 @@ Gigabytes) and software \\(for example \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}) and given a unique DOI. \end{itemize} - \end{frame} - \begin{frame} - The template is ready to use in the link below: + \begin{frame}{GOOD NEWS: RDA adoption grant to IAC for this template} + \begin{center} + \includegraphics[width=3cm]{img/rda.png}\hspace{1cm} + \includegraphics[width=1.8cm]{img/iac.png} - \textcolor{blue}{\footnotesize\url{https://gitlab.com/makhlaghi/reproducible-paper}} + \includegraphics[width=\linewidth]{img/h2020.png} + \end{center} - \vspace{1.5cm} For a technical description of the template's - implementation, as well as a checklist to customize it, and tips - on good practices, please see this page: + \vspace{1cm} For this template, the \alert{IAC} is selected as + a \alert{Top European organization} funded to adopt RDA + Recommendations and Outputs. - \textcolor{blue}{\footnotesize\url{https://gitlab.com/makhlaghi/reproducible-paper/blob/pipeline/README-hacking.md}} + \vspace{1cm} + \scriptsize + \begin{itemize} + \item Research Data Alliance was launched by the \alert{European + Commission}, NSF, National Institute of Standards and + Technology, and the Australian Government’s Department of + Innovation. + \item RDA Outputs are the technical and social infrastructure + solutions developed by RDA Working Groups or Interest + Groups that enable data sharing, exchange, and + interoperability. + \end{itemize} + + \vspace{0.2cm} + \centering + + \end{frame} + + + + \begin{frame}{Summary:} + + A fully working template/framework is introduced that will do the + following steps/instructions (all in simple plain text files). + \begin{itemize} + \item \alert{Automatically downloads} the necessary + \emph{software} and \emph{data}. + \item \alert{Builds} the software in a \alert{closed + environment}. + \item Runs the software on data to \alert{generate} the final + \alert{research results}. + \item A modification in one part of the analysis will only + result in re-doing that part, not the whole project. + \item Using LaTeX macros, paper's figures, tables and numbers + will be \alert{Automatically updated} after a change in + analysis. Allowing the scientist to focus on the scientific + interpretation. + \item The whole project is under \alert{version control} (Git) + to allow easy reversion to a previous state. This + \alert{encourages tests/experimentation} in the analysis. + \item The \alert{Git commit hash} of the project source, is + \alert{printed} in the published paper and \alert{saved on + output} data products. Ensuring the + integrity/reproducibility of the result. + \end{itemize} - \vspace{1.5cm} For more on the necessity of reproducible research, - please see: + \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt, + top=1pt, bottom=1pt] + For a technical description of the template's implementation, as + well as a checklist to customize it, and tips on good practices, + please see this page: - \textcolor{blue}{\footnotesize\url{http://akhlaghi.org/reproducible-science.html}} + \textcolor{blue}{\footnotesize\url{https://gitlab.com/makhlaghi/reproducible-paper/blob/master/README-hacking.md}} + \end{tcolorbox} \end{frame} \end{document} -- cgit v1.2.1