aboutsummaryrefslogtreecommitdiff
path: root/slides-intro-short.tex
diff options
context:
space:
mode:
Diffstat (limited to 'slides-intro-short.tex')
-rw-r--r--slides-intro-short.tex168
1 files changed, 52 insertions, 116 deletions
diff --git a/slides-intro-short.tex b/slides-intro-short.tex
index 3bc3f8f..b7c551a 100644
--- a/slides-intro-short.tex
+++ b/slides-intro-short.tex
@@ -51,7 +51,7 @@
%% Set the date and insitutional logos.
\date{\footnotesize\vspace{0cm}\\
- \href{https://www.rd-alliance.org/rda-global-adoption-week-15-19-june-2020}{RDA Global Adoption week}\\June 18th, 2020\\
+ IAC Scientific Coffee\\July 2nd, 2020\\
\tiny\vspace{3mm}
Most recent slides available in link below (this PDF is built from \href{http://git.maneage.org/slides-intro.git}{Git commit} \gitcommit):\\
\footnotesize\textcolor{blue}{\url{https://maneage.org/pdf/slides-intro-short.pdf}}\\
@@ -95,33 +95,33 @@
\usebackgroundtemplate{ } %% undeclare it
- \begin{frame}{Challenges of the RDA-WDS Publishing Data Workflows WG {\small (DOI:\href{https://doi.org/10.1007/s00799-016-0178-2}{10.1007/s00799-016-0178-2})}}
- Challenges (also relevant to researchers, not just repositories)
- \begin{itemize}
- \item \emph{Bi-directional linking}: how to \alert{link data and publications}.
- \item \emph{\alert{Software management}:} how to manage, preserve, publish and cite software?
- \item \emph{Metrics:} \alert{how often} are data used.
- \item \emph{Incentives to researchers:} how to \alert{communicate benefits} of following good practices \alert{to researchers}.
- \end{itemize}
-
- \begin{center}
- \includegraphics[width=4cm]{img/rda.png}\hspace{1cm}
- \includegraphics[width=4cm]{img/wds.jpg}
- \end{center}
-
- \ifdefined\longformat\pause\fi
-
- ``\emph{We would like to see a workflow that results in all
- \textcolor{blue!30!green}{\bf scholarly objects being connected},
- linked, citable, and persistent to allow researchers to navigate
- smoothly and to \alert{\bf enable reproducible research}. This
- includes \alert{{\bf linkages} between documentation, code, data, and
- journal articles in an integrated environment}. Furthermore,
- in the ideal workflow, all of these objects need to be
- \alert{\bf well documented} to enable other researchers (or
- citizen scientists etc) to reuse the data for new
- discoveries.}''
- \end{frame}
+% \begin{frame}{Challenges of the RDA-WDS Publishing Data Workflows WG {\small (DOI:\href{https://doi.org/10.1007/s00799-016-0178-2}{10.1007/s00799-016-0178-2})}}
+% Challenges (also relevant to researchers, not just repositories)
+% \begin{itemize}
+% \item \emph{Bi-directional linking}: how to \alert{link data and publications}.
+% \item \emph{\alert{Software management}:} how to manage, preserve, publish and cite software?
+% \item \emph{Metrics:} \alert{how often} are data used.
+% \item \emph{Incentives to researchers:} how to \alert{communicate benefits} of following good practices \alert{to researchers}.
+% \end{itemize}
+
+% \begin{center}
+% \includegraphics[width=4cm]{img/rda.png}\hspace{1cm}
+% \includegraphics[width=4cm]{img/wds.jpg}
+% \end{center}
+
+% \ifdefined\longformat\pause\fi
+
+% ``\emph{We would like to see a workflow that results in all
+% \textcolor{blue!30!green}{\bf scholarly objects being connected},
+% linked, citable, and persistent to allow researchers to navigate
+% smoothly and to \alert{\bf enable reproducible research}. This
+% includes \alert{{\bf linkages} between documentation, code, data, and
+% journal articles in an integrated environment}. Furthermore,
+% in the ideal workflow, all of these objects need to be
+% \alert{\bf well documented} to enable other researchers (or
+% citizen scientists etc) to reuse the data for new
+% discoveries.}''
+% \end{frame}
\newcommand{\allopacity}{1}
\ifdefined\longformat
@@ -752,6 +752,10 @@
\ifdefined\longformat
\begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \newcommand{\tomorrow}{1}
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \newcommand{\abstractify}{1}
+ \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
\newcommand{\projinit}{}
\begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
\newcommand{\projwork}{}
@@ -834,22 +838,6 @@
- \begin{frame}[t]{General outline of using Maneage (for example \href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018})}
- \vspace{1cm}
- \texttt{\$ git clone https://gitlab.com/makhlaghi/maneage-paper{ }{ }{ }{ }\textcolor{gray}{\# Import the project.}}\\
-
- \ifdefined\longformat\pause\fi
- \vspace{1.5cm}
- \texttt{\$ ./project configure { }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# You will specify the build directory on your system,}}\\
- \texttt{{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# and it will build all software (about 1.5 hours).}}
-
- \ifdefined\longformat\pause\fi
- \vspace{1.5cm}
- \texttt{\$ ./project make { }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# Does all the analysis and makes final PDF.}}\\
- \end{frame}
-
-
-
\ifdefined\longformat
\begin{frame}{Project source and its execution}
\begin{tcolorbox}
@@ -867,6 +855,26 @@
+
+
+ \begin{frame}[t]{Executing a Maneaged project (for example \href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018})}
+ \vspace{1cm}
+ \texttt{\$ git clone https://gitlab.com/makhlaghi/maneage-paper{ }{ }{ }{ }\textcolor{gray}{\# Import the project.}}\\
+
+ \ifdefined\longformat\pause\fi
+ \vspace{1.5cm}
+ \texttt{\$ ./project configure { }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# You will specify the build directory on your system,}}\\
+ \texttt{{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# and it will build all software (about 1.5 hours).}}
+
+ \ifdefined\longformat\pause\fi
+ \vspace{1.5cm}
+ \texttt{\$ ./project make { }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# Does all the analysis and makes final PDF.}}\\
+ \end{frame}
+
+
+
+
+
\begin{frame}{Future prospects...}
\large Adoption of reproducibility by many researchers will enable
the following:
@@ -944,76 +952,4 @@
\textcolor{blue}{\footnotesize\url{https://gitlab.com/maneage/project/-/blob/maneage/README-hacking.md}}
\end{tcolorbox}
\end{frame}
-
-
-\ifdefined\longformat
-\begin{frame}{Existing technologies (Independent environment)}
- \begin{itemize}
- \setlength\itemsep{7mm}
- \item \textbf{Virtual machines:}
- \begin{itemize}
- \setlength\itemsep{3mm}
- \item Contain the \alert{full operating system}, are thus very large ($\times$Gigabytes).
- \item In \emph{binary} format (decoding a built VM's environment is extremely hard and inaccurate).
- \end{itemize}
- \item \textbf{Containers:} (For example Docker or Singularity)
- \begin{itemize}
- \setlength\itemsep{3mm}
- \item Similar to virtual machines, but \alert{without low-level kernel} (use host's kernel).
- \item \alert{Will fail} as soon as kernel is no longer supported\\(for example Docker currently only supports Linux kernel 3.10 and above \alert{from 2013}).
- \item Good solutions for software engineers (that need to \emph{reproduce a bug's environment today}).
- \item Docker is modular, needs root previlages (not available in HPCs), Dockerfiles allow incompleteness\\(especially in the common scenario of using the operating system's package manager, see next slide)
- \item Singularity is monolithic and thus can be very large.
- \item In \alert{binary} format (similar to VMs, especially when OS package managers are used).
- \end{itemize}
- \end{itemize}
-
- \vspace{3mm}
-In summary, they only \alert{store a built} environment (they are outputs, not good for archiving).
-\end{frame}
-
-
-
-
-\begin{frame}{Existing technologies (Package managers)}
-
- \begin{itemize}
- \item \textbf{Operating system package managers:}
- \begin{itemize}
- \setlength\itemsep{2mm}
- \item For example \texttt{apt} or \texttt{yum} for Debian-based and RedHat-based GNU/Linux operating systems\\(the most common way to install software).
- \item Tightly intertwined with the operating system's components\\(arbitrary control of software versions is not easily possible).
- \item Older software (for example +5 years) is usually removed.
- \end{itemize}
- \item \textbf{Conda/Anaconda:}
- \begin{itemize}
- \setlength\itemsep{2mm}
- \item Conda has build instructions for software and their dependencies.
- \item But it doesn't go down to the C library or the lower-level components of operating system.
- \item It is written in Python (can't be used later when current Python is depreciated).
- \item Authors of Uhse+2019\footnote{\url{http://dx.doi.org/10.1002/cppb.20097}} report\footnote{\url{https://github.com/conda-forge/conda-forge.github.io/issues/787}} that their Conda environment breaks roughly every 3 months\\(Conda environments need to be updated to be used later! Breaking reproducibility).
- \end{itemize}
- \item \textbf{Nix, or GNU Guix:}
- \begin{itemize}
- \setlength\itemsep{2mm}
- \item Deliver perfectly reproducible builds (bit-wise reproducibility of software), needs root access.
- \item Doesn't \emph{require} documentation of dependencies.
- \end{itemize}
- \item \textbf{Spack:} Similar to Nix/Guix but written in Python.
- \end{itemize}
-\end{frame}
-
-\begin{frame}{Existing technologies (workflow tools)}
- \begin{itemize}
- \setlength\itemsep{4mm}
- \item \textbf{Binder:} (\url{https://mybinder.org}) Docker+Conda.
- \item \textbf{Galaxy:} (\url{https://galaxyproject.org}) A web-based user interface, primarily designed for genomics. The GUI make it hard to automate, and has too many dependencies. Very similar to GenePattern (2008 to 2017): with +40,000 users and $\sim4000$ jobs running per week, but cut due to funding.
- \item \textbf{Sciunit:} (\url{https://sciunit.run}) Parses program binaries to try to infer their dependencies and copy them.
- \item \textbf{Popper:} (\url{https://falsifiable.us}), HCL (previously used by GitHub Actions) + Conda + Docker.
- \item \textbf{WholeTale:} (\url{https://wholetale.org}) Jupyter + Conda + Docker.
- \item \textbf{Image Processing On Line (IPOL) journal:} The best example of publishing algorithms/methods I have seen, only useful for very basic/low-level software.
- \end{itemize}
- \alert{Summary}: except for IPOL, most solutions surveyed have far too many dependencies to be usable \alert{beyond the immediate future}.
-\end{frame}
-\fi
\end{document}