aboutsummaryrefslogtreecommitdiff
path: root/slides-intro.tex
diff options
context:
space:
mode:
Diffstat (limited to 'slides-intro.tex')
-rw-r--r--slides-intro.tex1515
1 files changed, 1515 insertions, 0 deletions
diff --git a/slides-intro.tex b/slides-intro.tex
new file mode 100644
index 0000000..33fa428
--- /dev/null
+++ b/slides-intro.tex
@@ -0,0 +1,1515 @@
+% LaTeX source of slides on reproducible paper.
+%
+% Copyright (C) 2018-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org>
+%
+% This LaTeX source is free software: you can redistribute it and/or
+% modify it under the terms of the GNU General Public License as
+% published by the Free Software Foundation, either version 3 of the
+% License, or (at your option) any later version.
+%
+% This LaTeX source is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+% General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with this LaTeX source. If not, see <https://www.gnu.org/licenses/>.
+
+% Basic LaTeX settings.
+\documentclass[9pt,usenames,dvipsnames,aspectratio=169]{beamer}
+
+% Read the current Git commit information
+\include{git-commit}
+\include{tex/preamble}
+
+%% Beamer settings.
+%\setbeamertemplate{footline}[frame number]
+
+%% Packages to import.
+\usepackage{tcolorbox} %For a color-box.
+\usepackage{textcomp} %For a copyright sign.
+
+%% To simplify arXiv links
+\newcommand{\arxivlink}[1]{{\footnotesize
+ (\textcolor{blue}{\href{https://arxiv.org/abs/#1}{arXiv:#1}})}}
+
+%% Set the title
+\title{\huge\textbf{BIG} Data, \textbf{BIG} responsibility
+ \\ {\normalsize Introducing \emph{Maneage}: customizable framework for \emph{man}aging data lin\emph{eage}}}
+
+%% Set the author
+\author{\vspace{1cm}\\
+ \href{https://akhlaghi.org}{Mohammad Akhlaghi}\\\vspace{0.5mm}
+ \footnotesize
+ Instituto de Astrof\'isica de Canarias ({\scriptsize IAC}), Tenerife, Spain
+}
+
+%% Set the date and insitutional logos.
+\date{\footnotesize\vspace{0cm}\\
+ \textcolor{white}{PLACE HOLDER}\\\textcolor{white}{MONTH DAY, YEAR} \\
+ \tiny\vspace{3mm}
+ Most recent slides available in link below (this PDF is built from \href{https://gitlab.com/maneage/slides-introduction}{Git commit} \gitcommit):\\
+ \footnotesize\textcolor{blue}{\url{https://maneage.org/pdf/slides-intro.pdf}}\\
+ \vspace{2mm}\hspace{-0.25cm}
+ \raisebox{+0.4\height}{\includegraphics[width=2.5cm]{img/ministerio-ciencia.png}}
+ \raisebox{+0.3\height}{\includegraphics[width=1.3cm]{img/sundial.png}}
+ \includegraphics[width=1.2cm]{img/iac.png}
+ \includegraphics[width=1cm]{img/eu-sundial.png}
+ \raisebox{0.13\height}{\includegraphics[width=1cm]{img/eu-regional.png}}
+ \raisebox{0.05\height}{\includegraphics[width=1cm]{img/eu-rdaeu4.png}}
+ \raisebox{+0.1\height}{\includegraphics[width=1.4cm]{img/rda-europe.png}}
+ \raisebox{+1.3\height}{\includegraphics[width=1.4cm]{img/ull.png}}
+ { }\raisebox{+0.5\height}{\includegraphics[width=2cm]{img/gobierno-canarias.png}}\\
+ \vspace{1cm}
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+\begin{document}
+
+ \begin{frame}
+ \titlepage
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Let's start with this nice image of the Wirlpool galaxy (M51): \small{\url{https://i.redd.it/jfqgpqg0hfk11.jpg}}}
+ \begin{center}
+ \includegraphics[width=0.8\linewidth]{img/m51-amateur.jpg}
+ \end{center}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Now, let's assume you want to study M51's outer structure, but you'll have to detect it first.}
+ \footnotesize
+ \begin{columns}
+ \column{6cm} Example: Using a \alert{single exposure} SDSS image
+ with NoiseChisel (a program that is part of `GNU Astronomy
+ Utilities').
+
+ \vspace{3mm}
+ \begin{itemize}
+ \setlength\itemsep{1.5mm}
+ \item When optimized, outskirts detected down to
+ $\rm{S/N}=$\alert{$1/4$}, or \alert{$28.3$} mag/arcsec$^2$. By
+ default, it only reaches $\rm{S/N}>1/2$.
+ \item
+ Akhlaghi 2019
+ (\textcolor{blue}{\href{https://arxiv.org/abs/1909.11230}{arXiv:1909.11230}})
+ describes optimized result:
+ \begin{itemize}
+ \footnotesize
+ \item \alert{Run-time} options/configuration.
+ \item Steps \alert{before/after} NoiseChisel.
+ \end{itemize}
+ \item Deep/orange image from Watkins+2015
+ (\textcolor{blue}{\href{https://arxiv.org/abs/1501.04599}{arXiv:1501.04599}}) shown for reference.
+ \item Therefore:
+ \begin{itemize}
+ \footnotesize
+ \item Default settings not enough.
+ \item Final number not just from NoiseChisel (more software
+ involved).
+ \end{itemize}
+ \end{itemize}
+
+ \vspace{2mm}
+ \begin{tcolorbox}[boxsep=0pt,left=1mm,right=1mm,top=1mm,bottom=1mm]
+ Simply reporting in your paper that ``\emph{\alert{we used
+ NoiseChisel}}'' is \alert{not enough} to reproduce,
+ understand, or verify your result.
+ \end{tcolorbox}
+
+ \column{9cm}
+ \begin{tikzpicture}
+
+ \node[anchor=south west,inner sep=0] (image) at (0,0.362\linewidth)
+ {\includegraphics[width=0.495\linewidth]
+ {img/m51-lf.pdf}};
+ \node[anchor=south,white,font={\small}] at
+ (0.25\linewidth,0.362\linewidth) {Input image};
+
+ \node[anchor=south west,inner sep=0] (image) at
+ (0.5\linewidth,0.362\linewidth)
+ {\includegraphics[width=0.495\linewidth]
+ {img/m51-edge-default.pdf}};
+ \node[anchor=south,white,font={\small}] at
+ (0.75\linewidth,0.362\linewidth) {Default NoiseChisel};
+
+ \node[anchor=south west,inner sep=0] (image) at (0,0)
+ {\includegraphics[width=0.495\linewidth]
+ {img/m51-edge.pdf}};
+ \node[anchor=south,white,font={\small}] at
+ (0.25\linewidth,0) {Optimized NoiseChisel};
+
+ \node[anchor=south west,inner sep=0] (image) at (0.5\linewidth,0)
+ {\includegraphics[width=0.495\linewidth]
+ {img/m51-watkins-zoom.png}};
+ \node[anchor=south,black,font={\small}] at
+ (0.75\linewidth,0) {Much deeper image};
+ \end{tikzpicture}
+ \end{columns}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Reproducibility crisis in the sciences/astronomy}
+ \begin{tcolorbox}[title=Snakes on a Spaceship -- An Overview of Python in Heliophysics]
+ \small ``...\alert{inadequate analysis descriptions} and loss of
+ scientific data have made scientific studies \alert{difficult}
+ or \alert{impossible} to replicate''. From Burrell+2018,
+ \arxivlink{1901.00143}.
+ \end{tcolorbox}
+ \pause
+ \begin{tcolorbox}[title=Perspectives on Reproducibility and Sustainability of Open-Source Scientific Software]
+ ``It is our interest that NASA adopt an open-code policy because
+ without it, reproducibility in computational science is
+ \alert{needlessly hampered}''. From Oishi+2018,
+ \arxivlink{1801.08200}.
+ \end{tcolorbox}
+ \pause
+ \begin{tcolorbox}[title=Schroedinger's code: source code availability and link persistence in astrophysics]
+ ``We were \alert{unable to find source code} online ... for
+ $40.4\%$ of the codes used in the research we looked at''. From
+ Allen+2018, \arxivlink{1801.02094}.
+ \end{tcolorbox}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}
+ \centering
+ \includegraphics[width=0.45\linewidth]{img/schrodinger-code.jpg}
+
+ \footnotesize Original image from \href{https://www.redbubble.com/people/seriesclothing/works/28520432-the-flash-ciscos-shirt-wanted-dead-and-alive-scr-dingers-cat}{\texttt{https://www.redbubble.com}}
+ \end{frame}
+
+
+ \begin{frame}{``Reproducibility crisis'' in the sciences? (Baker 2016, Nature 533, 452)}
+ \centering
+ \includegraphics[width=0.85\linewidth]{img/reproducibility-crisis.jpg}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}[t]{Definitions \& Clarification \hspace{1.6cm} {\normalsize(from the National Academies report in 2019, \href{http://doi.org/10.17226/25303}{DOI:10.17226/25303})}}
+ \vspace{-5mm}
+ \begin{columns}[t]
+ \column{0.5\linewidth}
+ \begin{center}
+ \large\textbf{Replicability (hardware/statistical)}
+ \rule{0.5\linewidth}{1pt}
+ \end{center}
+ \begin{itemize}
+ \setlength\itemsep{0.5em}
+ \item Involves data \alert{collection}.
+ \item Inherently includes \alert{measurements errors}\\(can
+ never be exactly reproduced).
+ \item Example: Raw telescope image/spectra.
+ \item \alert{\textbf{NOT DISCUSSED HERE.}}
+ \end{itemize}
+
+ \vspace{3.5mm}
+ \begin{center}
+ \vspace{-5mm}
+ \includegraphics[width=0.7\linewidth]{img/hale-prime-focus.jpg}\\
+ \vspace{-0.6mm}
+ \tiny \href{http://slittlefair.staff.shef.ac.uk/teaching/phy217/lectures/telescopes/L07/index.html}{http://slittlefair.staff.shef.ac.uk}
+ \end{center}
+
+ \column{0.5\linewidth}
+ \end{columns}
+ \end{frame}
+ \begin{frame}[t]{Definitions \& Clarification \hspace{1.6cm} {\normalsize(from the National Academies report in 2019, \href{http://doi.org/10.17226/25303}{DOI:10.17226/25303})}}
+ \vspace{-5mm}
+ \begin{columns}[t]
+ \column{0.5\linewidth}
+ \begin{center}
+ \large\textbf{Replicability (hardware/statistical)}
+ \rule{0.5\linewidth}{1pt}
+ \end{center}
+ \begin{itemize}
+ \setlength\itemsep{0.5em}
+ \item Involves data \alert{collection}.
+ \item Inherently includes \alert{measurements errors}\\(can
+ never be exactly reproduced).
+ \item Example: Raw telescope image/spectra.
+ \item \alert{\textbf{NOT DISCUSSED HERE.}}
+ \end{itemize}
+
+ \vspace{3.5mm}
+ \begin{center}
+ \vspace{-5mm}
+ \includegraphics[width=0.7\linewidth]{img/hale-prime-focus-marked.jpg}\\
+ \vspace{-0.6mm}
+ \tiny \href{http://slittlefair.staff.shef.ac.uk/teaching/phy217/lectures/telescopes/L07/index.html}{http://slittlefair.staff.shef.ac.uk}
+ \end{center}
+
+ \column{0.5\linewidth}
+ \end{columns}
+ \end{frame}
+ \begin{frame}[t]{Definitions \& Clarification \hspace{1.6cm} {\normalsize(from the National Academies report in 2019, \href{http://doi.org/10.17226/25303}{DOI:10.17226/25303})}}
+ \vspace{-5mm}
+ \begin{columns}[t]
+ \column{0.5\linewidth}
+ \begin{center}
+ \large\textbf{Replicability (hardware/statistical)}
+ \rule{0.5\linewidth}{1pt}
+ \end{center}
+ \begin{itemize}
+ \setlength\itemsep{0.5em}
+ \item Involves data \alert{collection}.
+ \item Inherently includes \alert{measurements errors}\\(can
+ never be exactly reproduced).
+ \item Example: Raw telescope image/spectra.
+ \item \alert{\textbf{NOT DISCUSSED HERE.}}
+ \end{itemize}
+
+ \vspace{3.5mm}
+ \begin{center}
+ \vspace{-5mm}
+ \includegraphics[width=0.7\linewidth]{img/hale-prime-focus.jpg}\\
+ \vspace{-0.6mm}
+ \tiny \href{http://slittlefair.staff.shef.ac.uk/teaching/phy217/lectures/telescopes/L07/index.html}{http://slittlefair.staff.shef.ac.uk}
+ \end{center}
+
+ \column{0.5\linewidth}
+ \begin{center}
+ \large\textbf{Reproducibility (Software/Deterministic)}
+ \rule{0.5\linewidth}{1pt}
+ \end{center}
+ \begin{itemize}
+ \setlength\itemsep{1em}
+ \item Involves data \alert{analysis}, or simulations.
+ \item Starts \alert{after} data is collected/digitized.
+ \item Example: $2+2=4$ (i.e., sum of datasets).
+ \item \textbf{\textcolor{green!50!black}{DISCUSSED HERE.}}
+ \end{itemize}
+
+ \centering
+ \vspace{3mm}
+ \includegraphics[width=0.8\linewidth]{img/digital-tunnel.jpg}\\
+ \vspace{-0.6mm}
+ \tiny \href{https://tsongas.com/newsletter_articles/the-new-electronic-version-of-the-advantage/digital-tunnel-wallpaper/}{https://tsongas.com}
+ \end{columns}
+ \end{frame}
+
+
+
+
+
+
+ %% Step-by-step slides.
+ \newcommand{\allopacity}{1}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\paperinit}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\sver}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \begin{frame}{Different package managers have different versions of software (repology.org, 2019/11/20)}
+ \begin{columns}
+ \column{7cm} \center
+ Astropy\\
+ \includegraphics[width=2.2cm]{img/distros-astropy.pdf}
+ \column{7cm} \center
+ GNU Astronomy Utilities (Gnuastro)\\
+ \includegraphics[width=2.7cm]{img/distros-gnuastro.pdf}
+ \end{columns}
+ \end{frame}
+ \newcommand{\srep}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\dver}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\ddver}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\confopt}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\confenv}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \begin{frame}{Example: Matplotlib (a Python visualization library) build dependencies}
+ \Wider[5em]{
+ %\vspace{5mm}
+ \begin{center}
+ \includegraphics[width=0.9\linewidth]{img/matplotlib.png}
+ \end{center}
+
+ \vspace{3mm}\tiny From ``Attributing and Referencing (Research)
+ Software: Best Practices and Outlook from Inria'' (Alliez et
+ al. 2019,
+ \textcolor{blue}{\href{https://hal.archives-ouvertes.fr/hal-02135891}{hal-02135891}})
+ }
+ \end{frame}
+ \begin{frame}{Impact of ``Dependency hell'' on native building in various hardware (CPU architectures)}
+ \begin{columns}
+ \column{7cm}
+ \includegraphics[width=0.9\linewidth]{img/cpu-arch-astropy.png}
+ Astropy depends on Matplotlib
+ \column{6cm}
+ \includegraphics[width=0.9\linewidth]{img/cpu-arch-gnuastro.png}
+ GNU Astronomy Utilities doesn't.
+ \end{columns}
+ \end{frame}
+ \newcommand{\containers}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\db}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\calib}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\corr}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\runord}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\runopt}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\humanerr}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\confirmbias}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\depupdate}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\coauth}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\varsinpaper}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\recordinfo}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\softcite}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \begin{frame}
+ \begin{tcolorbox}[title={Di Cosmo \& Pellegrini (2019) \small Encouraging a wider usage of software derived from research}]
+ \centering ``\textbf{Software is a hybrid} object in the world research as it is equally a driving force (as a \alert{tool}), a \alert{result} (as proof of the existence of a solution) and an \alert{object of study} (as an artefact)''.
+ \end{tcolorbox}
+ \end{frame}
+ \newcommand{\prevchange}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \newcommand{\paperfinal}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+
+
+ %% Don't show the happy scientist or the existing containers box.
+ \let\paperinit\undefined
+ \let\allopacity\undefined
+ \let\paperfinal\undefined
+ \let\containers\undefined
+
+
+
+
+
+ \begin{frame}{Science is a tricky business}
+ \begin{center}
+ \includegraphics[width=0.9\linewidth]{img/nature-cartoon.jpg}
+ \end{center}
+
+ \vspace{-0.3cm}\hfill
+ {\tiny Image from nature.com
+ (``\href{https://www.nature.com/articles/d41586-017-07522-z}{Five
+ ways to fix statistics}'', Nov 2017)}\hspace{7mm}
+
+ \vspace{-1mm}
+ \begin{tcolorbox}[boxsep=0pt,left=1mm,right=1mm,top=1mm,bottom=1mm]
+ \small Data analysis [...] is a \alert{human
+ behavior}. Researchers who hunt hard enough will turn up a
+ result that fits statistical criteria, but their
+ \alert{discovery} will probably be a \alert{false positive}.
+
+ \hfill Five ways to fix statistics, Nature, 551, Nov 2017.
+ \end{tcolorbox}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}
+ \begin{tcolorbox}[title={Buckheit \& Donoho (1996) \small Lecture Notes in Statistics (vol 103, DOI:\textcolor{blue!10!white}{\href{https://doi.org/10.1007/978-1-4612-2544-7\_5}{10.1007/978-1-4612-2544-7\_5}})}]
+ \centering ``An \alert{article} about computational science [\emph{today: almost all sciences}] ... is not the scholarship itself, it is merely \textbf{ADVERTISING} of the \textbf{SCHOLARSHIP}.
+
+ \vspace{5mm}
+ \pause
+ The \textbf{ACTUAL SCHOLARSHIP} is the \alert{complete software development environment} and the \alert{complete set of instructions} which generated the figures.''
+ \end{tcolorbox}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Principles behind proposed solution}
+ \begin{tcolorbox}[title=Basic/simple principle:]
+ \centering Science is defined by its METHOD, \alert{not} its
+ result.
+ \end{tcolorbox}
+
+ \pause
+ \begin{itemize}
+ \item \textbf{Complete/self-contained:}
+ \begin{itemize}
+ \item \alert{Only dependency} should be \alert{POSIX} tools \textcolor{gray}{(discards Conda or Jupyter which need Python)}.
+ \pause
+ \item Must \alert{not require root} permissions \textcolor{gray}{(discards tools like Docker or Nix/Guix)}.
+ \pause
+ \item Should be \alert{non-interactive} or runnable in batch (user interaction is an incompleteness).
+ \pause
+ \item Should be usable \alert{without internet} connection.
+ \end{itemize}
+
+ \pause
+ \item \textbf{Modularity:} Parts of the project should be \alert{re-usable} in other projects.
+ \pause
+ \item \textbf{Plain text:} Project's source should be in \alert{plain-text} \textcolor{gray}{(binary formats need special software)}
+ \begin{itemize}
+ \item This includes high-level analysis.
+ \item It is easily publishable (very low volume of $\times100$KB), archivable, and parse-able.
+ \item \alert{Version control} (e.g., with Git) can track project's history.
+ \end{itemize}
+ \pause
+ \item \textbf{Minimal complexity:} Occum’s rasor: “Never posit pluralities without necessity”.
+ \begin{itemize}
+ \item Avoiding the \alert{fashionable} tool of the day: tomorrow another tool will take its place!
+ \item Easier \alert{learning curve}, also doesn't create a \alert{generational gap}.
+ \item Is \alert{compatible} and \alert{extensible}.
+ \end{itemize}
+ \pause
+ \item \textbf{Verifable inputs and outputs:} Inputs and Outputs must be \alert{automatically verified}.
+ \pause
+ \item \textbf{Free and open source software:} \alert{Free software} is essential: non-free software is not configurable, not distributable, and dependent on non-free provider (which may discontinue it in N years).
+ \end{itemize}
+ \end{frame}
+
+
+
+
+
+ \newcommand{\focusonpackages}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \let\focusonpackages\undefined
+
+
+
+
+
+ \begin{frame}{Predefined/exact software tools}
+ \small
+ \begin{columns}
+ \column{10cm}
+ \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
+ top=1pt, bottom=1pt, title=Reproducibility \&
+ software]
+ \footnotesize Reproducing the environment (specific
+ \alert{software versions}, \alert{build instructions} and
+ \alert{dependencies}) is also critically important for
+ reproducibility.
+ \end{tcolorbox}
+
+ \vspace{2cm}
+
+ \begin{itemize}
+ \setlength\itemsep{0.6cm}
+ \item \emph{Containers} or \emph{Virtual Machines} are a
+ \alert{binary black box}.
+
+ \item Maneage \alert{installs fixed versions} of all
+ necessary research software and their dependencies.
+
+ \item Installs similar environment on \alert{GNU/Linux}, or
+ \alert{macOS} systems.
+
+ \item Works very much like a package manager (e.g.,
+ \alert{\texttt{apt}} or \alert{\texttt{brew}}).
+ \end{itemize}
+
+ \column{5cm}
+ \includegraphics[width=\linewidth]{img/version.png}
+ \end{columns}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Predefined/exact software tools}
+ \small
+ \begin{columns}
+ \column{10cm}
+ \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
+ top=1pt, bottom=1pt, title=Reproducibility \&
+ software]
+ \footnotesize Reproducing the environment (specific
+ \alert{software versions}, \alert{build instructions} and
+ \alert{dependencies}) is also critically important for
+ reproducibility.
+ \end{tcolorbox}
+
+ \vspace{2cm}
+
+ \begin{itemize}
+ \setlength\itemsep{0.6cm}
+ \item \emph{Containers} or \emph{Virtual Machines} are a
+ \alert{binary black box}.
+
+ \item Maneage \alert{installs fixed versions} of all
+ necessary research software and their dependencies.
+
+ \item Installs similar environment on \alert{GNU/Linux}, or
+ \alert{macOS} systems.
+
+ \item Works very much like a package manager (e.g.,
+ \alert{\texttt{apt}} or \alert{\texttt{brew}}).
+ \end{itemize}
+
+ \column{5cm}
+ \includegraphics[width=\linewidth]{img/version-highlighted.png}
+ \end{columns}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Controlled environment and build instructions}
+ \small
+ \begin{columns}
+ \column{5.5cm}
+ \includegraphics[width=0.9\linewidth]{img/env.png}
+ \column{5.5cm}
+ \includegraphics[width=0.9\linewidth]{img/build.png}
+ \end{columns}
+ \end{frame}
+
+ \begin{frame}{Controlled environment and build instructions}
+ \small
+ \begin{columns}
+ \column{5.5cm}
+ \includegraphics[width=0.9\linewidth]{img/env-highlighted.png}
+ \column{5.5cm}
+ \includegraphics[width=0.9\linewidth]{img/build-highlighted.png}
+ \end{columns}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Example: Matplotlib (a Python visualization library) build dependencies}
+ \Wider[5em]{
+ %\vspace{5mm}
+ \begin{center}
+ \includegraphics[width=0.9\linewidth]{img/matplotlib.png}
+ \end{center}
+
+ \vspace{3mm}\tiny From ``Attributing and Referencing (Research)
+ Software: Best Practices and Outlook from Inria'' (Alliez et
+ al. 2019,
+ \textcolor{blue}{\href{https://hal.archives-ouvertes.fr/hal-02135891}{hal-02135891}})
+ }
+ \end{frame}
+
+
+
+
+
+ \newcommand{\prjdir}{\textcolor{gray}{/PROJECT}}
+ \newcommand{\lcolor}[1]{\textcolor{green!80!black}{#1}}
+ \begin{frame}{All high-level dependencies are under control (e.g., NoiseChisel's dependencies)}
+ \scriptsize
+ \begin{columns}
+ \column{8cm}
+ \begin{center} {\large \textbf{GNU/Linux distribution}} \end{center}
+
+ \texttt{\$ ldd .local/bin/astnoisechisel}\\
+ \hspace{0.5cm}\texttt{\lcolor{libgnuastro.so.7} => \textcolor{gray}{\prjdir}/libgnuastro.so.7 (0x00007f6745f39000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libgit2.so.26} => \prjdir/libgit2.so.26 (0x00007f6745df1000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libtiff.so.5} => \prjdir/libtiff.so.5 (0x00007f6745d77000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{liblzma.so.5} => \prjdir/liblzma.so.5 (0x00007f6745d4f000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libjpeg.so.9} => \prjdir/libjpeg.so.9 (0x00007f6745d12000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libwcs.so.6} => \prjdir/libwcs.so.6 (0x00007f6745ba8000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libcfitsio.so.8} => \prjdir/libcfitsio.so.8 (0x00007f674588b000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libcurl.so.4} => \prjdir/libcurl.so.4 (0x00007f6745811000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libssl.so.1.1} => \prjdir/libssl.so.1.1 (0x00007f6745777000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libcrypto.so.1.1} => \prjdir/libcrypto.so.1.1 (0x00007f6745491000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libz.so.1} => \prjdir/libz.so.1 (0x00007f6745474000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libgsl.so.23} => \prjdir/libgsl.so.23 (0x00007f67451e3000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{libgslcblas.so.0} => \prjdir/libgslcblas.so.0 (0x00007f67451a1000)}\\
+ \hspace{0.5cm}\texttt{\lcolor{linux-vdso.so.1} (0x00007fffdcbf7000)}\\
+ \hspace{0.5cm}\texttt{\textcolor{blue}{libpthread.so.0} => /usr/lib/libpthread.so.0 (0x00007f6745006000)}\\
+ \hspace{0.5cm}\texttt{\textcolor{blue}{libm.so.6} => /usr/lib/libm.so.6 (0x00007f6745027000)}\\
+ \hspace{0.5cm}\texttt{\textcolor{blue}{libc.so.6} => /usr/lib/libc.so.6 (0x00007f6744e43000)}\\
+ \hspace{0.5cm}\texttt{\textcolor{blue}{libdl.so.2} => /usr/lib/libdl.so.2 (0x00007f6744e1e000)}\\
+ \hspace{0.5cm}\texttt{\textcolor{blue}{/lib64/ld-linux-x86-64.so.2} => /usr/lib64/ld-linux-x86-64.so.2}
+
+ \column{7.5cm}
+ \begin{center} {\large \textbf{macOS}} \end{center}
+
+ \texttt{\$ otool -L .local/bin/astnoisechisel}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libgnuastro.7.dylib} (comp ver 8.0.0, cur ver 8.0.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libgit2.26.dylib} (comp ver 26.0.0, cur ver 0.26.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libtiff.5.dylib} (comp ver 10.0.0, cur ver 10.0.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{liblzma.5.dylib} (comp ver 8.0.0, cur ver 8.4.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libjpeg.9.dylib} (comp ver 12.0.0, cur ver 12.0.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libwcs.6.2.dylib} (comp ver 6.0.0, cur ver 6.2.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libcfitsio.8.dylib} (comp ver 8.0.0, cur ver 8.3.47)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libcurl.4.dylib} (comp ver 10.0.0, cur ver 10.0.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libssl.1.1.dylib} (comp ver 1.1.0, cur ver 1.1.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libcrypto.1.1.dylib} (comp ver 1.1.0, cur ver 1.1.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libz.1.dylib} (comp ver 1.0.0, cur ver 1.2.11)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libgsl.23.dylib} (comp ver 25.0.0, cur ver 25.0.0)}\\
+ \hspace{0.5cm}\texttt{\prjdir/\lcolor{libgslcblas.0.dylib} (comp ver 1.0.0, cur ver 1.0.0)}\\
+ \hspace{0.5cm}\alert{/usr/lib/libSystem.B.dylib} (comp ver 1.0.0, cur ver 1252.50.4)
+
+ \vspace{1.1cm}
+ \end{columns}
+
+ \vspace{2mm}
+ \begin{tcolorbox}[boxsep=0pt,left=1mm,right=1mm,top=1mm,bottom=1mm]
+ \small
+
+ \lcolor{Project libraries:} High-level libraries built from
+ source for each project (note the same version in both OSs).
+
+ \textcolor{blue}{GNU C Library:} Project specific build is in progress (\url{http://savannah.nongnu.org/task/?15390}).
+
+ \alert{Closed operating system files}: We have no control on low-level non-free operating systems components.
+ \end{tcolorbox}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Advantages of this build system}
+ \begin{columns}
+ \column{11cm}
+ \begin{itemize}
+ \setlength\itemsep{0.7cm}
+ \item Project runs in fixed/controlled environment: custom build
+ of \alert{Bash}, \alert{Make}, GNU Coreutils
+ (\alert{\texttt{ls}}, \alert{\texttt{cp}},
+ \alert{\texttt{mkdir}} and etc), \alert{AWK}, or \alert{SED},
+ \alert{\LaTeX}, etc.
+ \item No need for \alert{root}/administrator \alert{permissions}
+ (on servers or super computers).
+ \item Whole system is built \alert{automatically} on any
+ Unix-like operating system \\(less 2 hours).
+ \item Dependencies of different projects will \alert{not conflict}.
+ \item Everything in \alert{plain text} (human \& computer
+ readable/archivable).
+ \end{itemize}
+ \column{4cm}
+ \includegraphics[width=\linewidth]{img/unchained.jpg}\\
+ \tiny \url{https://natemowry2.wordpress.com}
+ \end{columns}
+ \end{frame}
+
+
+
+ \begin{frame}{Software citation automatically generated in paper (including Astropy)}
+ \centering
+ \includegraphics[width=0.8\linewidth]{img/software-cite.jpg}
+ \end{frame}
+ \begin{frame}{Software citation automatically generated in paper (including Astropy)}
+ \centering
+ \includegraphics[width=0.8\linewidth]{img/software-cite-highlighted.jpg}
+ \end{frame}
+
+
+
+
+ \begin{frame}{Software citation automatically generated in paper (only GNU Astronomy Utilities)}
+ \centering
+ \includegraphics[width=0.4\linewidth]{img/software-cite-no-py.jpg}
+ \end{frame}
+ \begin{frame}{Software citation automatically generated in paper (only GNU Astronomy Utilities)}
+ \centering
+ \includegraphics[width=0.4\linewidth]{img/software-cite-no-py-highlighted.jpg}
+ \end{frame}
+
+
+
+
+
+
+
+
+
+
+ %% Hardware/data
+ \newcommand{\focusonhardware}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \let\focusonhardware\undefined
+
+ \begin{frame}{Input data source and integrity is documented and checked}
+ \small
+ \begin{columns}
+ \column{10cm}
+ Stored information about each input file:
+ \begin{itemize}
+ \item \alert{PID} (where available).
+ \item Download \alert{URL}.
+ \item \alert{MD5}-sum to check integrity.
+ \end{itemize}
+
+ \vspace{0.75cm} All inputs are \alert{downloaded} from the given
+ PID/URL when necessary\\(during the analysis).
+
+ \vspace{0.75cm} MD5-sums are \alert{checked} to make sure the
+ download was done properly or the file is the same (hasn't
+ changed on the server/source).
+
+ \vspace{0.75cm}Example from the reproducible paper \textcolor{blue}{\href{https://arxiv.org/abs/1909.11230}{arXiv:1909.11230}}.\\
+ This paper needs three input files (two images, one catalog).
+
+ \column{5cm}
+ \includegraphics[width=\linewidth]{img/inputs.png}
+ \end{columns}
+ \end{frame}
+
+ \begin{frame}{Input data source and integrity is documented and checked}
+ \small
+ \begin{columns}
+ \column{10cm}
+ Stored information about each input file:
+ \begin{itemize}
+ \item \alert{PID} (where available).
+ \item Download \alert{URL}.
+ \item \alert{MD5}-sum to check integrity.
+ \end{itemize}
+
+ \vspace{0.75cm} All inputs are \alert{downloaded} from the given
+ PID/URL when necessary\\(during the analysis).
+
+ \vspace{0.75cm} MD5-sums are \alert{checked} to make sure the
+ download was done properly or the file is the same (hasn't
+ changed on the server/source).
+
+ \vspace{0.75cm}Example from the reproducible paper \textcolor{blue}{\href{https://arxiv.org/abs/1909.11230}{arXiv:1909.11230}}.\\
+ This paper needs three input files (two images, one catalog).
+
+ \column{5cm}
+ \includegraphics[width=\linewidth]{img/inputs-highlighted.png}
+ \end{columns}
+ \end{frame}
+
+
+
+
+
+
+
+
+
+
+
+ %% Analysis
+ \newcommand{\focusonrun}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \let\focusonrun\undefined
+
+
+
+
+
+ \begin{frame}{Reproducible science: Maneage is managed through a Makefile}
+ \small
+ \begin{columns}
+ \column{10cm}
+
+ All steps (downloading and analysis) are managed by Makefiles\\
+ (example from
+ \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}):
+
+ \vspace{5mm}
+ \begin{itemize}
+ \setlength\itemsep{0.7cm}
+ \item Unlike a script which always starts from the top, a
+ Makefile \alert{starts from the end} and steps that don't
+ change will be left untouched (not remade).
+ \item A single \emph{rule} can \alert{manage any number of
+ files}.
+ \item Make can identify independent steps internally and do them
+ in \alert{parallel}.
+ \item Make was \alert{designed for complex projects} with
+ thousands of files (all major Unix-like components), so it is
+ highly evolved and efficient.
+ \item Make is a very \alert{simple} and \alert{small} language,
+ thus easy to learn with great and free documentation (for
+ example
+ \textcolor{blue}{\href{https://www.gnu.org/software/make/manual/}{GNU
+ Make's manual}}).
+ \end{itemize}
+
+ \column{5cm}
+ \includegraphics[width=\linewidth]{img/reproducible-makefile.png}
+ \end{columns}
+ \end{frame}
+ \begin{frame}{Reproducible science: Maneage is managed through a Makefile}
+ \small
+ \begin{columns}
+ \column{10cm}
+
+ All steps (downloading and analysis) are managed by Makefiles\\
+ (example from
+ \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}):
+
+ \vspace{5mm}
+ \begin{itemize}
+ \setlength\itemsep{0.7cm}
+ \item Unlike a script which always starts from the top, a
+ Makefile \alert{starts from the end} and steps that don't
+ change will be left untouched (not remade).
+ \item A single \emph{rule} can \alert{manage any number of
+ files}.
+ \item Make can identify independent steps internally and do them
+ in \alert{parallel}.
+ \item Make was \alert{designed for complex projects} with
+ thousands of files (all major Unix-like components), so it is
+ highly evolved and efficient.
+ \item Make is a very \alert{simple} and \alert{small} language,
+ thus easy to learn with great and free documentation (for
+ example
+ \textcolor{blue}{\href{https://www.gnu.org/software/make/manual/}{GNU
+ Make's manual}}).
+ \end{itemize}
+
+ \column{5cm}
+ \includegraphics[width=\linewidth]{img/reproducible-makefile-highlighted-1.png}
+ \end{columns}
+ \end{frame}
+ \begin{frame}{Reproducible science: Maneage is managed through a Makefile}
+ \small
+ \begin{columns}
+ \column{10cm}
+
+ All steps (downloading and analysis) are managed by Makefiles\\
+ (example from
+ \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}):
+
+ \vspace{5mm}
+ \begin{itemize}
+ \setlength\itemsep{0.7cm}
+ \item Unlike a script which always starts from the top, a
+ Makefile \alert{starts from the end} and steps that don't
+ change will be left untouched (not remade).
+ \item A single \emph{rule} can \alert{manage any number of
+ files}.
+ \item Make can identify independent steps internally and do them
+ in \alert{parallel}.
+ \item Make was \alert{designed for complex projects} with
+ thousands of files (all major Unix-like components), so it is
+ highly evolved and efficient.
+ \item Make is a very \alert{simple} and \alert{small} language,
+ thus easy to learn with great and free documentation (for
+ example
+ \textcolor{blue}{\href{https://www.gnu.org/software/make/manual/}{GNU
+ Make's manual}}).
+ \end{itemize}
+
+ \column{5cm}
+ \includegraphics[width=\linewidth]{img/reproducible-makefile-highlighted-2.png}
+ \end{columns}
+ \end{frame}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ \newcommand{\focusonpaper}{}
+ \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \let\focusonpaper\undefined
+
+
+ \begin{frame}{Values in final report/paper}
+ All analysis \alert{results} (numbers, plots, tables) written in
+ paper's PDF as \alert{\LaTeX{} macros}. They are thus
+ \alert{updated automatically} on any change.\\ Shown here is a
+ portion of the \textsf{NoiseChisel} paper and its \LaTeX{} source
+ (\textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}}).
+
+ \vspace{0.4cm}
+ \includegraphics[width=\linewidth]{img/reproducible-latex.png}
+ \end{frame}
+
+ \begin{frame}{Values in final report/paper}
+ All analysis \alert{results} (numbers, plots, tables) written in
+ paper's PDF as \alert{\LaTeX{} macros}. They are thus
+ \alert{updated automatically} on any change.\\ Shown here is a
+ portion of the \textsf{NoiseChisel} paper and its \LaTeX{} source
+ (\textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}}).
+
+ \vspace{0.4cm}
+ \includegraphics[width=\linewidth]{img/reproducible-latex-highlighted.png}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Analysis step results/values concatenated into a single file.}
+ All \LaTeX{} macros come from a \alert{single file}.
+ \begin{center}
+ \includegraphics[width=0.6\linewidth]{img/reproducible-macros.png}
+ \end{center}
+ \end{frame}
+ \begin{frame}{Analysis step results/values concatenated into a single file.}
+ All \LaTeX{} macros come from a \alert{single file}.
+ \begin{center}
+ \includegraphics[width=0.6\linewidth]{img/reproducible-macros-highlighted.png}
+ \end{center}
+ \end{frame}
+
+
+
+
+
+
+
+ \begin{frame}{Analysis results stored as \LaTeX{} macros}
+ The analysis scripts write/update the \LaTeX{} macro values
+ automatically.
+ \begin{center}
+ \includegraphics[width=0.6\linewidth]{img/reproducible-write-macro.png}
+ \end{center}
+ \end{frame}
+ \begin{frame}{Analysis results stored as \LaTeX{} macros}
+ The analysis scripts write/update the \LaTeX{} macro values
+ automatically.
+ \begin{center}
+ \includegraphics[width=0.6\linewidth]{img/reproducible-write-macro-highlight.png}
+ \end{center}
+ \end{frame}
+
+
+ %% Make demo.
+ \begin{frame}
+ \LARGE
+ \vspace{1cm}
+ \hfill Let's see how the analysis is managed in a hypothetical project...
+ \end{frame}
+ \makedemoslide{img/data-lineage-1.pdf}
+ {Makefiles (\texttt{\*.mk}) keep contextually separate parts of the project, all imported into \texttt{top-make.mk}}
+ \makedemoslide{img/data-lineage-2.pdf}
+ {The ultimate purpose of the project is to produce a paper/report (in PDF).}
+ \makedemoslide{img/data-lineage-3.pdf}
+ {The narrative description, typography and references are in \texttt{paper.tex} \& \texttt{references.tex}.}
+ \makedemoslide{img/data-lineage-4.pdf}
+ {Analysis outputs (blended into the PDF as \LaTeX{} macros) come from \texttt{project.tex}.}
+ \makedemoslide{img/data-lineage-5.pdf}
+ {But analysis outputs must first be \emph{verified} (with checksums) before entering the report/paper.}
+ \makedemoslide{img/data-lineage-6.pdf}
+ {Basic project info comes from \texttt{initialize.tex}.}
+ \makedemoslide{img/data-lineage-7.pdf}
+ {Reported values about the downloaded inputs come from \texttt{download.tex}.}
+ \makedemoslide{img/data-lineage-8.pdf}
+ {... for example the number of rows in the second input (a catalog) of the project.}
+ \makedemoslide{img/data-lineage-9.pdf}
+ {The URL to download \texttt{input2.dat}, and a checksum to validate it, are stored in \texttt{INPUTS.conf}.}
+ \makedemoslide{img/data-lineage-10.pdf}
+ {Reported values from first analysis steps stored in \texttt{analysis1.tex}.}
+ \makedemoslide{img/data-lineage-11.pdf}
+ {... for example the average of the numbers in \texttt{out-1b.dat}.}
+ \makedemoslide{img/data-lineage-12.pdf}
+ {But \texttt{out-1b.dat} itself depends on other files and a paramter (for example a multiple of sigma).}
+ \makedemoslide{img/data-lineage-13.pdf}
+ {\texttt{out-1a.dat} is built from a downloaded dataset.}
+ \makedemoslide{img/data-lineage-14.pdf}
+ {Download URL and checksum of \texttt{input1.dat} also stored in \texttt{INPUTS.conf}.}
+ \makedemoslide{img/data-lineage-15.pdf}
+ {Reported values from second analysis steps stored in \texttt{analysis2.tex}.}
+ \makedemoslide{img/data-lineage-16.pdf}
+ {... for example the number of selected rows in \texttt{out-2b.dat}.}
+ \makedemoslide{img/data-lineage-17.pdf}
+ {\texttt{out-2b.dat} is derived from \texttt{out-1b.dat} (for example, rejected some of \texttt{out-1b.dat}'s rows).}
+ \makedemoslide{img/data-lineage-18.pdf}
+ {Reported values from third analysis steps stored in \texttt{analysis3.tex}.}
+ \makedemoslide{img/data-lineage-19.pdf}
+ {... for example measurements from both \texttt{out-3a.dat} and \texttt{out-3b.dat}.}
+ \makedemoslide{img/data-lineage-20.pdf}
+ {\texttt{out-3b.dat} is generated from an analysis on \texttt{out-2a.dat}.}
+ \makedemoslide{img/data-lineage-21.pdf}
+ {But \texttt{out-2a.dat} itself is generated from \texttt{input1.dat} and an analysis which has two settings.}
+ \makedemoslide{img/data-lineage-22.pdf}
+ {\texttt{out-3a.dat} also depends on \texttt{out-1a.dat} and an analysis with needs one parameter.}
+
+
+
+
+
+ \begin{frame}{\LARGE The whole project is a directed graph (codifying the data's lineage).}
+ \LARGE
+ \begin{itemize}
+ \setlength\itemsep{1cm}
+ \item Every \alert{file} (source or built) is a \alert{node} in the graph (connected to others).\\
+ {\large (The links/connections/dependencies between the nodes, defined by the Makefiles: \texttt{*.mk})}
+ \item There are two types of nodes/files:
+ \begin{itemize}
+ \LARGE
+ \setlength\itemsep{4mm}
+ \item \alert{Source} nodes (\texttt{*.conf} and \texttt{paper.tex}) only have an \alert{outward} link.
+ \item \alert{Built} files always have \alert{inward} \emph{and} {\normalsize (except \texttt{paper.pdf})} \alert{outward} link(s).
+ \end{itemize}
+ \item All built files ultimately originate from a \texttt{*.conf} file,\\
+ ... and ultimately conclude in \texttt{paper.pdf}.
+ \end{itemize}
+ \end{frame}
+
+ \begin{frame}{Benefits of using Make}
+ \Large
+ \begin{itemize}
+ \setlength\itemsep{4mm}
+ \item Make can \alert{parallelize} the analysis: \\Make knows
+ which steps are indepenent and will run them at the same time.\\
+ \item Make can \alert{automatically detect a change} and will
+ re-do \emph{only} the affected steps.\\ {\normalsize (for
+ example to change the multiple of sigma in a configuration
+ file to see its effect)}
+ \item Easily \alert{backtrace} any step (without needing to remember!).\\
+ {\normalsize (very useful to find problems/improvements)}
+ \item The above will speed up your work, and \alert{encourage experimentation} on methods.
+ \item Make is \alert{available} on any system: many people are \alert{already familiar} with it.
+ \item And again: its \alert{all in plain text}!\\{\normalsize (doesn't take much space, easy to read, distribute, parse automatically, or archive)}
+ \item Recall that the project's \alert{software installation} was also managed in Make.
+ \end{itemize}
+ \end{frame}
+
+ \begin{frame}{Files organized in directories by context (here are some of the files discussed before)}
+ \centering
+ \includegraphics[width=0.85\linewidth]{img/figure-file-architecture-1.pdf}
+ \end{frame}
+
+ \begin{frame}{Files organized in directories by context (now with other project files and symbolic links)}
+ \centering
+ \includegraphics[width=0.85\linewidth]{img/figure-file-architecture-2.pdf}
+ \end{frame}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ \newcommand{\allopacity}{1}
+ \begin{frame}{All questions have an answer now (in
+ \alert{plain text}: human \& computer readable/archivable).}
+ \include{tex/project-graph} \end{frame}
+ \newcommand{\gitlogo}{}
+ \begin{frame}{All questions have an answer now (in
+ \alert{plain text}: so we can use Git to keep its history).}
+ \include{tex/project-graph}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \newcommand{\projinit}{}
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \newcommand{\projwork}{}
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \newcommand{\tempevolve}{}
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \newcommand{\mergewithtemp}{}
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \newcommand{\tofuture}{}
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \newcommand{\githappy}{}
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \newcommand{\gitverified}{}
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+
+ \begin{frame}{Two recent examples (publishing Git checksum in abstract)}
+ \begin{columns}
+ \column{0.5\linewidth}
+ \centering
+ \includegraphics[width=0.8\linewidth]{img/firstpage-190911230.png}
+ \column{0.5\linewidth}
+ \centering
+ \includegraphics[width=0.8\linewidth]{img/firstpage-mnras491.png}
+ \end{columns}
+ \end{frame}
+
+ \begin{frame}{Two recent examples (publishing Git checksum in abstract)}
+ \begin{columns}
+ \column{0.5\linewidth}
+ \centering
+ \includegraphics[width=0.8\linewidth]{img/firstpage-190911230-highlighted.png}
+ \column{0.5\linewidth}
+ \centering
+ \includegraphics[width=0.8\linewidth]{img/firstpage-mnras491-highlighted.png}
+ \end{columns}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Publication of the project}
+
+ A reproducible project using Maneage will have the following
+ (\alert{plain text}) components:
+ \begin{itemize}
+ \item Makefiles.
+ \item \LaTeX{} source files.
+ \item Configuration files for software used in analysis.
+ \item Scripts/programming files (e.g., Python, Shell, AWK, C).
+ \end{itemize}
+ The \alert{volume} of the project's source will thus be
+ \alert{negligible} compared to a single figure in a paper
+ (usually $\sim100$ kilo-bytes).
+
+ \vspace{1cm} The project's pipeline (customized Maneage) can be
+ \alert{published} in
+ \begin{itemize}
+ \item \alert{arXiv}: uploaded with the \LaTeX{} source to always
+ stay with the paper \\(for example
+ \textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}}). The
+ file containing all macros must also be uploaded so arXiv's
+ server can easily build the \LaTeX{} source.
+ \item \alert{Zenodo}: Along with all the input datasets (many
+ Gigabytes) and software \\(for example
+ \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.3408481}{zenodo.3408481}}) and given a unique DOI.
+ \end{itemize}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Project source and its execution}
+ \begin{tcolorbox}
+ Programs \textcolor{gray}{[here: Scientific projects]} must be
+ written for \alert{people to read}...
+
+ \hfill ...and only \emph{incidentally} for machines to
+ \emph{execute}.
+
+ \vspace{2mm}
+ \hfill \footnotesize Harold Abelson, Structure and Interpretation of Computer Programs
+ \end{tcolorbox}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}[t]{General outline of using this system (for example \href{https://arxiv.org/abs/1909.11230}{arXiv:1909.11230})}
+ \vspace{1cm}
+ \texttt{\$ git clone http://gitlab.com/makhlaghi/iau-symposium-355{ }{ }{ }{ }\textcolor{gray}{\# Import the project.}}\\
+
+ \pause
+ \vspace{1.5cm}
+ \texttt{\$ ./project configure { }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# You will specify the build directory on your system,}}\\
+ \texttt{{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# and it will build all software (about 1.5 hours).}}
+
+ \pause
+ \vspace{1.5cm}
+ \texttt{\$ ./project make { }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# Does all the analysis and makes final PDF.}}\\
+ \end{frame}
+
+
+
+
+
+
+ \begin{frame}{Future prospects...}
+ \large Adoption of reproducibility by many researchers will enable
+ the following:
+
+ \vspace{1em}
+ \begin{itemize}
+ \setlength\itemsep{3mm}
+ \item A repository for education/training \textcolor{gray}{(PhD
+ students, or researchers in other fields)}.
+ \item Easy \alert{verification}/\alert{understanding} of other
+ research projects \textcolor{gray}{(when necessary)}.
+ \item Trivially \alert{test} different steps of others' work
+ \textcolor{gray}{(different configurations, software and etc)}.
+ \item Science can progress \alert{incrementally}
+ \textcolor{gray}{(shorter papers actually building on each
+ other!)}.
+ \item \alert{Extract meta-data} after the publication of a dataset
+ \textcolor{gray}{(for future ontologies or vocabularies)}.
+ \item Applying \alert{machine learning} on reproducible research
+ projects will allow us to solve some Big Data Challenges:
+
+ \vspace{1em}
+ \begin{itemize}
+ \setlength\itemsep{2mm}
+ \item \emph{Extract the relevant parameters automatically}.
+ \item \emph{Translate the science to enormous samples}.
+ \item \emph{Believe the results when no one will have time to
+ reproduce}.
+ \item \emph{Have confidence in results derived using machine
+ learning or AI}.
+ \end{itemize}
+ \end{itemize}
+ \end{frame}
+
+
+
+
+ \begin{frame}{RDA adoption grant (2019) to IAC for Maneage}
+ \begin{center}
+ \includegraphics[width=3cm]{img/rda.png}\hspace{1cm}
+ \includegraphics[width=1.8cm]{img/iac.png}
+ \includegraphics[width=\linewidth]{img/h2020.jpg}
+ \end{center}
+
+ \vspace{1cm} For Maneage, the \alert{IAC} is selected as
+ a \alert{Top European organization} funded to adopt RDA
+ Recommendations and Outputs.
+
+ \vspace{1cm}
+ \scriptsize
+ \begin{itemize}
+ \item Research Data Alliance was launched by the \alert{European
+ Commission}, NSF, National Institute of Standards and
+ Technology, and the Australian Government’s Department of
+ Innovation.
+ \item RDA Outputs are the technical and social infrastructure
+ solutions developed by RDA Working Groups or Interest
+ Groups that enable data sharing, exchange, and
+ interoperability.
+ \end{itemize}
+
+ \vspace{0.2cm}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Workshop on Maneage at IAC: \alert{first week of April} (March 30th to April 3rd)}
+
+ We are organizing a workshop to help interested \alert{early career researchers} adopt Maneage.
+
+ \vspace{5mm}
+ \begin{columns}
+ \column{0.25\linewidth}
+ \centering
+ \includegraphics[width=\linewidth]{img/rda-europe.png}
+ \column{0.5\linewidth}
+ \centering
+ \includegraphics[width=\linewidth]{img/workshop-shutterstock.png}\\
+ {\tiny Image from \href{https://www.shutterstock.com/es/image-vector/managers-workshop-training-manager-skills-brainstorming-1334996078}{shutterstock.com}}
+
+ \column{0.25\linewidth}
+ \includegraphics[width=0.7\linewidth]{img/iac.png}
+ \end{columns}
+
+ \vspace{7mm}
+ Please contact \alert{akhlaghi@iac.es} to join (Space is very limited: it is hands-on).
+ \end{frame}
+
+
+
+
+\begin{frame}{Existing technologies (Independent environment)}
+ \begin{itemize}
+ \setlength\itemsep{7mm}
+ \item \textbf{Virtual machines:}
+ \begin{itemize}
+ \setlength\itemsep{3mm}
+ \item Contain the \alert{full operating system}, are thus very large ($\times$Gigabytes).
+ \item In \emph{binary} format (decoding a built VM's environment is extremely hard and inaccurate).
+ \end{itemize}
+ \item \textbf{Containers:} (For example Docker or Singularity)
+ \begin{itemize}
+ \setlength\itemsep{3mm}
+ \item Similar to virtual machines, but \alert{without low-level kernel} (use host's kernel).
+ \item \alert{Will fail} as soon as kernel is no longer supported\\(for example Docker currently only supports Linux kernel 3.10 and above \alert{from 2013}).
+ \item Good solutions for software engineers (that need to \emph{reproduce a bug's environment today}).
+ \item Docker is modular, needs root previlages (not available in HPCs), Dockerfiles allow incompleteness\\(especially in the common scenario of using the operating system's package manager, see next slide)
+ \item Singularity is monolithic and thus can be very large.
+ \item In \alert{binary} format (similar to VMs, especially when OS package managers are used).
+ \end{itemize}
+ \end{itemize}
+
+ \vspace{3mm}
+In summary, they only \alert{store a built} environment (they are outputs, not good for archiving).
+
+\end{frame}
+
+
+
+
+
+\begin{frame}{Existing technologies (Package managers)}
+
+ \begin{itemize}
+ \item \textbf{Operating system package managers:}
+ \begin{itemize}
+ \setlength\itemsep{2mm}
+ \item For example \texttt{apt} or \texttt{yum} for Debian-based and RedHat-based GNU/Linux operating systems\\(the most common way to install software).
+ \item Tightly intertwined with the operating system's components\\(arbitrary control of software versions is not easily possible).
+ \item Older software (for example +5 years) is usually removed.
+ \end{itemize}
+ \item \textbf{Conda/Anaconda:}
+ \begin{itemize}
+ \setlength\itemsep{2mm}
+ \item Conda has build instructions for software and their dependencies.
+ \item But it doesn't go down to the C library or the lower-level components of operating system.
+ \item It is written in Python (can't be used later when current Python is depreciated).
+ \item Authors of Uhse+2019\footnote{\url{http://dx.doi.org/10.1002/cppb.20097}} report\footnote{\url{https://github.com/conda-forge/conda-forge.github.io/issues/787}} that their Conda environment breaks roughly every 3 months\\(Conda environments need to be updated to be used later! Breaking reproducibility).
+ \end{itemize}
+ \item \textbf{Nix, or GNU Guix:}
+ \begin{itemize}
+ \setlength\itemsep{2mm}
+ \item Deliver perfectly reproducible builds (bit-wise reproducibility of software), needs root access.
+ \item Doesn't \emph{require} documentation of dependencies.
+ \end{itemize}
+ \item \textbf{Spack:} Similar to Nix/Guix but written in Python.
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Existing technologies (workflow tools)}
+ \begin{itemize}
+ \setlength\itemsep{4mm}
+ \item \textbf{Binder:} (\url{https://mybinder.org}) Docker+Conda.
+ \item \textbf{Galaxy:} (\url{https://galaxyproject.org}) A web-based user interface, primarily designed for genomics. The GUI make it hard to automate, and has too many dependencies. Very similar to GenePattern (2008 to 2017): with +40,000 users and $\sim4000$ jobs running per week, but cut due to funding.
+ \item \textbf{Sciunit:} (\url{https://sciunit.run}) Parses program binaries to try to infer their dependencies and copy them.
+ \item \textbf{Popper:} (\url{https://falsifiable.us}), HCL (previously used by GitHub Actions) + Conda + Docker.
+ \item \textbf{WholeTale:} (\url{https://wholetale.org}) Jupyter + Conda + Docker.
+ \item \textbf{Image Processing On Line (IPOL) journal:} The best example of publishing algorithms/methods I have seen, only useful for very basic/low-level software.
+ \end{itemize}
+ \alert{Summary}: except for IPOL, most solutions surveyed have far too many dependencies to be usable \alert{beyond the immediate future}.
+\end{frame}
+
+
+
+
+ \begin{frame}{Summary:}
+
+ Maneage is introduced as a customizable template that will do the
+ following steps/instructions (all in simple plain text files).
+ \begin{itemize}
+ \item \alert{Automatically downloads} the necessary
+ \emph{software} and \emph{data}.
+ \item \alert{Builds} the software in a \alert{closed
+ environment}.
+ \item Runs the software on data to \alert{generate} the final
+ \alert{research results}.
+ \item A modification in one part of the analysis will only
+ result in re-doing that part, not the whole project.
+ \item Using LaTeX macros, paper's figures, tables and numbers
+ will be \alert{Automatically updated} after a change in
+ analysis. Allowing the scientist to focus on the scientific
+ interpretation.
+ \item The whole project is under \alert{version control} (Git)
+ to allow easy reversion to a previous state. This
+ \alert{encourages tests/experimentation} in the analysis.
+ \item The \alert{Git commit hash} of the project source, is
+ \alert{printed} in the published paper and \alert{saved on
+ output} data products. Ensuring the
+ integrity/reproducibility of the result.
+ \item \colorbox{green!30!white}{These slides are available at
+ \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro.pdf}}.}
+ \end{itemize}
+
+ \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
+ top=1pt, bottom=1pt]
+ For a technical description of Maneage's implementation, as well
+ as a checklist to customize it, and tips on good practices,
+ please see this page:
+
+ \textcolor{blue}{\footnotesize\url{https://gitlab.com/maneage/project/-/blob/maneage/README-hacking.md}}
+ \end{tcolorbox}
+ \end{frame}
+\end{document}
+
+
+
+
+
+% \begin{frame}{Funding to help adoption of template}
+% \begin{itemize}
+% \setlength\itemsep{5mm}
+% \item With the RDA grant, \alert{we have funding} to support the
+% travel and stay of several researchers from outside of
+% astronomy/astrophysics to the IAC \alert{for one week} to help
+% in adopting the template in their research.
+% \item Conditions:
+% \begin{itemize}
+% \setlength\itemsep{3mm}
+% \item The exact date will be set in coordination with the
+% selected researchers.
+% \item \alert{Early Career Researchers} (PhD students or Postdocs) will
+% be preferred.
+% \item Your project can be done with \alert{Free
+% Sofware} [1].\\ {\footnotesize (Note that non-free software are
+% by definition non-reproducible:\\ A random researcher can't
+% run and redistribute them as they wish, or study and modify
+% their source.)}
+% \item Your project can be run in \alert{Unix-like operating
+% systems}, ideally GNU/Linux distributions [2].\\(but other Unix-like OSs are also fine, for example BSD-based distributions, or macOS).
+% \end{itemize}
+% \item Please contact me by the end of November 2019 if you are
+% interested
+% (\textcolor{blue}{\href{mailto:mohammad@akhlaghi.org}{mohammad@akhlaghi.org}}).\\ {\footnotesize (If positions aren't filled by then, we will still accept letters of interest, and generally, we would be happy to help in adoption remotely. So if you are interested, please get in touch any way!)}
+% \end{itemize}
+%
+% \vspace{3mm}
+% \footnotesize
+% 1. \url{https://www.gnu.org/philosophy/free-sw.en.html}\\
+% 2. For example: Mint, Debian, Ubuntu, OpenSUSE, Manjaro, Fedora, CentOS, Redhat, and many others.
+% \end{frame}