aboutsummaryrefslogtreecommitdiff
path: root/reproducible-paper.tex
diff options
context:
space:
mode:
Diffstat (limited to 'reproducible-paper.tex')
-rw-r--r--reproducible-paper.tex182
1 files changed, 75 insertions, 107 deletions
diff --git a/reproducible-paper.tex b/reproducible-paper.tex
index 4b8003c..a8209c1 100644
--- a/reproducible-paper.tex
+++ b/reproducible-paper.tex
@@ -46,7 +46,7 @@
%% Set the date and insitutional logos.
\date{\footnotesize\vspace{0cm}\\
- \href{http://iactalks.iac.es/talks/view/1386}{Research Division Seminar}, IACTalks\\ \href{https://iac.es}{Instituto de Astrof\'isica de Canarias} (IAC),\\February 20th, 2020 \\
+ \href{https://www.rd-alliance.org/groups/exposing-data-management-plans-wg}{RDA Exposing Data Management Plans Working group}\\March 5th, 2020 \\
\tiny\vspace{3mm}
Most recent slides available in link below (this PDF is built from \href{https://gitlab.com/makhlaghi/reproducible-paper-slides}{Git commit} \gitcommit):\\
\footnotesize\textcolor{blue}{\url{http://akhlaghi.org/pdf/reproducible-paper.pdf}}\\
@@ -1270,112 +1270,6 @@
- \begin{frame}
- \vspace{1cm}
- \hfill\Large Usage in real-world instrument pipelines...
- \end{frame}
-
- \begin{frame}{AstroCat (Tarragona, Spain), pipeline written by Ra\'ul Infante-Sainz, data from Aleix Roig}
- \begin{columns}
- \column{0.5\linewidth}
- \includegraphics[width=\linewidth]{img/astrocat-telescope.jpg}
- \column{0.5\linewidth}
- \includegraphics[width=\linewidth]{img/astrocat-m101.jpg}
- \end{columns}
- \end{frame}
-
- \begin{frame}{IAC 80 (Teide Observatory), Ra\'ul Infante-Sainz \& Alberto Madrigal (Master student)}
- \begin{columns}
- \column{0.4\linewidth}
- \includegraphics[width=\linewidth]{img/iac80.jpg}
- \column{0.6\linewidth}
- \includegraphics[width=\linewidth]{img/iac80-df4.jpg}
- \end{columns}
- \end{frame}
-
- \begin{frame}{HiPERCAM (GTC), Ra\'ul Infante-Sainz and Giulia Golini (Master Student)}
- \begin{columns}
- \column{0.4\linewidth}
- \centering
- \includegraphics[width=0.8\linewidth]{img/gtc.jpg}
- \includegraphics[width=0.8\linewidth]{img/gtc-hipercam.jpg}
- \column{0.6\linewidth}
- \includegraphics[width=\linewidth]{img/gtc-hipercam-udg.jpg}
- \end{columns}
- \end{frame}
-
- \begin{frame}{SDSS PSF, Ra\'ul Infante-Sainz et al. 2020}
- \begin{columns}
- \column{0.4\linewidth}
- \centering
- \includegraphics[width=0.8\linewidth]{img/sdss.jpg}
- \includegraphics[width=0.8\linewidth]{img/sdss-ccds.jpg}
- \column{0.6\linewidth}
- %% http://www.imc-srl.com/work-in-progress/
- \includegraphics[width=\linewidth]{img/sdss-psf.jpg}
- \end{columns}
- \end{frame}
-
- \begin{frame}{Remember Ra\'ul's paper that was shown before?}
- \centering
- \includegraphics[width=0.7\linewidth]{img/firstpage-mnras491-highlighted.png}
- \end{frame}
-
- \begin{frame}{Subaru Telescope Hyper SuprimeCam PSF, Roberto Baena Gall\'e}
- \begin{columns}
- \column{0.4\linewidth}
- \centering
- \includegraphics[width=0.8\linewidth]{img/subaru.jpg}
- \includegraphics[width=0.8\linewidth]{img/subaru-hsc.jpg}
- \column{0.6\linewidth}
- \includegraphics[width=\linewidth]{img/subaru-hsc-psf.jpg}
- \end{columns}
- \end{frame}
-
- \begin{frame}{OSIRIS (GTC), Ra\'ul Castellanos (Madrid), guided by Ra\'ul Infante-Sainz}
- \begin{columns}
- \column{0.4\linewidth}
- \centering
- \includegraphics[width=0.8\linewidth]{img/gtc.jpg}
- \includegraphics[width=0.8\linewidth]{img/gtc-osiris.jpg}
- \column{0.6\linewidth}
- %% http://www.imc-srl.com/work-in-progress/
- \centering
- \includegraphics[width=0.6\linewidth]{img/gtc-osiris-abel2390.jpg}\\
- \includegraphics[width=0.3\linewidth]{img/work-in-progress.jpg}
- \end{columns}
- \end{frame}
-
- \begin{frame}{Iranian National Obs. Lens Array: Zahra Sharbaf, Hamed Altafi, Elham Saremi, Surena Fatemi}
- \begin{columns}
- \column{0.4\linewidth}
- \centering
- \includegraphics[width=0.8\linewidth]{img/inola.jpg}
- \includegraphics[width=0.8\linewidth]{img/inola-team.jpg}
- \column{0.6\linewidth}
- \centering
- %% http://www.imc-srl.com/work-in-progress/
- \includegraphics[width=0.7\linewidth]{img/inola-ngc6946.jpg}\\
- \includegraphics[width=0.3\linewidth]{img/work-in-progress.jpg}
- \end{columns}
- \end{frame}
-
- \begin{frame}{Subaru Telescope Hyper SuprimeCam for the Low Surface Brightness, Mohammad Akhlaghi}
- \begin{columns}
- \column{0.4\linewidth}
- \centering
- \includegraphics[width=0.8\linewidth]{img/subaru.jpg}
- \includegraphics[width=0.8\linewidth]{img/subaru-hsc.jpg}
- \column{0.6\linewidth}
- \centering
- \includegraphics[width=0.7\linewidth]{img/subaru-hsc-dr2-star.png}\\
- \tiny{Image from HSC DR2, showing the problem of over-subtraction!}\\
- \includegraphics[width=0.3\linewidth]{img/work-in-progress.jpg}
- \end{columns}
- \end{frame}
-
-
-
\begin{frame}{Future prospects...}
\large Adoption of reproducibility by many researchers will enable
@@ -1470,6 +1364,80 @@
+\begin{frame}{Existing technologies (Independent environment)}
+ \begin{itemize}
+ \setlength\itemsep{7mm}
+ \item \textbf{Virtual machines:}
+ \begin{itemize}
+ \setlength\itemsep{3mm}
+ \item Contain the \alert{full operating system}, are thus very large ($\times$Gigabytes).
+ \item In \emph{binary} format (decoding a built VM's environment is extremely hard and inaccurate).
+ \end{itemize}
+ \item \textbf{Containers:} (For example Docker or Singularity)
+ \begin{itemize}
+ \setlength\itemsep{3mm}
+ \item Similar to virtual machines, but \alert{without low-level kernel} (use host's kernel).
+ \item \alert{Will fail} as soon as kernel is no longer supported\\(for example Docker currently only supports Linux kernel 3.10 and above \alert{from 2013}).
+ \item Good solutions for software engineers (that need to \emph{reproduce a bug's environment today}).
+ \item Docker is modular, needs root previlages (not available in HPCs), Dockerfiles allow incompleteness\\(especially in the common scenario of using the operating system's package manager, see next slide)
+ \item Singularity is monolithic and thus can be very large.
+ \item In \alert{binary} format (similar to VMs, especially when OS package managers are used).
+ \end{itemize}
+ \end{itemize}
+
+ \vspace{3mm}
+In summary, they only \alert{store a built} environment (they are outputs, not good for archiving).
+
+\end{frame}
+
+
+
+
+
+\begin{frame}{Existing technologies (Package managers)}
+
+ \begin{itemize}
+ \item \textbf{Operating system package managers:}
+ \begin{itemize}
+ \setlength\itemsep{2mm}
+ \item For example \texttt{apt} or \texttt{yum} for Debian-based and RedHat-based GNU/Linux operating systems\\(the most common way to install software).
+ \item Tightly intertwined with the operating system's components\\(arbitrary control of software versions is not easily possible).
+ \item Older software (for example +5 years) is usually removed.
+ \end{itemize}
+ \item \textbf{Conda/Anaconda:}
+ \begin{itemize}
+ \setlength\itemsep{2mm}
+ \item Conda has build instructions for software and their dependencies.
+ \item But it doesn't go down to the C library or the lower-level components of operating system.
+ \item It is written in Python (can't be used later when current Python is depreciated).
+ \item Authors of Uhse+2019\footnote{\url{http://dx.doi.org/10.1002/cppb.20097}} report\footnote{\url{https://github.com/conda-forge/conda-forge.github.io/issues/787}} that their Conda environment breaks roughly every 3 months\\(Conda environments need to be updated to be used later! Breaking reproducibility).
+ \end{itemize}
+ \item \textbf{Nix, or GNU Guix:}
+ \begin{itemize}
+ \setlength\itemsep{2mm}
+ \item Deliver perfectly reproducible builds (bit-wise reproducibility of software), needs root access.
+ \item Doesn't \emph{require} documentation of dependencies.
+ \end{itemize}
+ \item \textbf{Spack:} Similar to Nix/Guix but written in Python.
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Existing technologies (workflow tools)}
+ \begin{itemize}
+ \setlength\itemsep{4mm}
+ \item \textbf{Binder:} (\url{https://mybinder.org}) Docker+Conda.
+ \item \textbf{Galaxy:} (\url{https://galaxyproject.org}) A web-based user interface, primarily designed for genomics. The GUI make it hard to automate, and has too many dependencies. Very similar to GenePattern (2008 to 2017): with +40,000 users and $\sim4000$ jobs running per week, but cut due to funding.
+ \item \textbf{Sciunit:} (\url{https://sciunit.run}) Parses program binaries to try to infer their dependencies and copy them.
+ \item \textbf{Popper:} (\url{https://falsifiable.us}), HCL (previously used by GitHub Actions) + Conda + Docker.
+ \item \textbf{WholeTale:} (\url{https://wholetale.org}) Jupyter + Conda + Docker.
+ \item \textbf{Image Processing On Line (IPOL) journal:} The best example of publishing algorithms/methods I have seen, only useful for very basic/low-level software.
+ \end{itemize}
+ \alert{Summary}: except for IPOL, most solutions surveyed have far too many dependencies to be usable \alert{beyond the immediate future}.
+\end{frame}
+
+
+
+
\begin{frame}{Summary:}
A fully working template/framework is introduced that will do the