diff options
Diffstat (limited to 'slides-intro-short.tex')
-rw-r--r-- | slides-intro-short.tex | 168 |
1 files changed, 52 insertions, 116 deletions
diff --git a/slides-intro-short.tex b/slides-intro-short.tex index 3bc3f8f..b7c551a 100644 --- a/slides-intro-short.tex +++ b/slides-intro-short.tex @@ -51,7 +51,7 @@ %% Set the date and insitutional logos. \date{\footnotesize\vspace{0cm}\\ - \href{https://www.rd-alliance.org/rda-global-adoption-week-15-19-june-2020}{RDA Global Adoption week}\\June 18th, 2020\\ + IAC Scientific Coffee\\July 2nd, 2020\\ \tiny\vspace{3mm} Most recent slides available in link below (this PDF is built from \href{http://git.maneage.org/slides-intro.git}{Git commit} \gitcommit):\\ \footnotesize\textcolor{blue}{\url{https://maneage.org/pdf/slides-intro-short.pdf}}\\ @@ -95,33 +95,33 @@ \usebackgroundtemplate{ } %% undeclare it - \begin{frame}{Challenges of the RDA-WDS Publishing Data Workflows WG {\small (DOI:\href{https://doi.org/10.1007/s00799-016-0178-2}{10.1007/s00799-016-0178-2})}} - Challenges (also relevant to researchers, not just repositories) - \begin{itemize} - \item \emph{Bi-directional linking}: how to \alert{link data and publications}. - \item \emph{\alert{Software management}:} how to manage, preserve, publish and cite software? - \item \emph{Metrics:} \alert{how often} are data used. - \item \emph{Incentives to researchers:} how to \alert{communicate benefits} of following good practices \alert{to researchers}. - \end{itemize} - - \begin{center} - \includegraphics[width=4cm]{img/rda.png}\hspace{1cm} - \includegraphics[width=4cm]{img/wds.jpg} - \end{center} - - \ifdefined\longformat\pause\fi - - ``\emph{We would like to see a workflow that results in all - \textcolor{blue!30!green}{\bf scholarly objects being connected}, - linked, citable, and persistent to allow researchers to navigate - smoothly and to \alert{\bf enable reproducible research}. This - includes \alert{{\bf linkages} between documentation, code, data, and - journal articles in an integrated environment}. Furthermore, - in the ideal workflow, all of these objects need to be - \alert{\bf well documented} to enable other researchers (or - citizen scientists etc) to reuse the data for new - discoveries.}'' - \end{frame} +% \begin{frame}{Challenges of the RDA-WDS Publishing Data Workflows WG {\small (DOI:\href{https://doi.org/10.1007/s00799-016-0178-2}{10.1007/s00799-016-0178-2})}} +% Challenges (also relevant to researchers, not just repositories) +% \begin{itemize} +% \item \emph{Bi-directional linking}: how to \alert{link data and publications}. +% \item \emph{\alert{Software management}:} how to manage, preserve, publish and cite software? +% \item \emph{Metrics:} \alert{how often} are data used. +% \item \emph{Incentives to researchers:} how to \alert{communicate benefits} of following good practices \alert{to researchers}. +% \end{itemize} + +% \begin{center} +% \includegraphics[width=4cm]{img/rda.png}\hspace{1cm} +% \includegraphics[width=4cm]{img/wds.jpg} +% \end{center} + +% \ifdefined\longformat\pause\fi + +% ``\emph{We would like to see a workflow that results in all +% \textcolor{blue!30!green}{\bf scholarly objects being connected}, +% linked, citable, and persistent to allow researchers to navigate +% smoothly and to \alert{\bf enable reproducible research}. This +% includes \alert{{\bf linkages} between documentation, code, data, and +% journal articles in an integrated environment}. Furthermore, +% in the ideal workflow, all of these objects need to be +% \alert{\bf well documented} to enable other researchers (or +% citizen scientists etc) to reuse the data for new +% discoveries.}'' +% \end{frame} \newcommand{\allopacity}{1} \ifdefined\longformat @@ -752,6 +752,10 @@ \ifdefined\longformat \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame} + \newcommand{\tomorrow}{1} + \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame} + \newcommand{\abstractify}{1} + \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame} \newcommand{\projinit}{} \begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame} \newcommand{\projwork}{} @@ -834,22 +838,6 @@ - \begin{frame}[t]{General outline of using Maneage (for example \href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018})} - \vspace{1cm} - \texttt{\$ git clone https://gitlab.com/makhlaghi/maneage-paper{ }{ }{ }{ }\textcolor{gray}{\# Import the project.}}\\ - - \ifdefined\longformat\pause\fi - \vspace{1.5cm} - \texttt{\$ ./project configure { }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# You will specify the build directory on your system,}}\\ - \texttt{{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# and it will build all software (about 1.5 hours).}} - - \ifdefined\longformat\pause\fi - \vspace{1.5cm} - \texttt{\$ ./project make { }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# Does all the analysis and makes final PDF.}}\\ - \end{frame} - - - \ifdefined\longformat \begin{frame}{Project source and its execution} \begin{tcolorbox} @@ -867,6 +855,26 @@ + + + \begin{frame}[t]{Executing a Maneaged project (for example \href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018})} + \vspace{1cm} + \texttt{\$ git clone https://gitlab.com/makhlaghi/maneage-paper{ }{ }{ }{ }\textcolor{gray}{\# Import the project.}}\\ + + \ifdefined\longformat\pause\fi + \vspace{1.5cm} + \texttt{\$ ./project configure { }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# You will specify the build directory on your system,}}\\ + \texttt{{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# and it will build all software (about 1.5 hours).}} + + \ifdefined\longformat\pause\fi + \vspace{1.5cm} + \texttt{\$ ./project make { }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# Does all the analysis and makes final PDF.}}\\ + \end{frame} + + + + + \begin{frame}{Future prospects...} \large Adoption of reproducibility by many researchers will enable the following: @@ -944,76 +952,4 @@ \textcolor{blue}{\footnotesize\url{https://gitlab.com/maneage/project/-/blob/maneage/README-hacking.md}} \end{tcolorbox} \end{frame} - - -\ifdefined\longformat -\begin{frame}{Existing technologies (Independent environment)} - \begin{itemize} - \setlength\itemsep{7mm} - \item \textbf{Virtual machines:} - \begin{itemize} - \setlength\itemsep{3mm} - \item Contain the \alert{full operating system}, are thus very large ($\times$Gigabytes). - \item In \emph{binary} format (decoding a built VM's environment is extremely hard and inaccurate). - \end{itemize} - \item \textbf{Containers:} (For example Docker or Singularity) - \begin{itemize} - \setlength\itemsep{3mm} - \item Similar to virtual machines, but \alert{without low-level kernel} (use host's kernel). - \item \alert{Will fail} as soon as kernel is no longer supported\\(for example Docker currently only supports Linux kernel 3.10 and above \alert{from 2013}). - \item Good solutions for software engineers (that need to \emph{reproduce a bug's environment today}). - \item Docker is modular, needs root previlages (not available in HPCs), Dockerfiles allow incompleteness\\(especially in the common scenario of using the operating system's package manager, see next slide) - \item Singularity is monolithic and thus can be very large. - \item In \alert{binary} format (similar to VMs, especially when OS package managers are used). - \end{itemize} - \end{itemize} - - \vspace{3mm} -In summary, they only \alert{store a built} environment (they are outputs, not good for archiving). -\end{frame} - - - - -\begin{frame}{Existing technologies (Package managers)} - - \begin{itemize} - \item \textbf{Operating system package managers:} - \begin{itemize} - \setlength\itemsep{2mm} - \item For example \texttt{apt} or \texttt{yum} for Debian-based and RedHat-based GNU/Linux operating systems\\(the most common way to install software). - \item Tightly intertwined with the operating system's components\\(arbitrary control of software versions is not easily possible). - \item Older software (for example +5 years) is usually removed. - \end{itemize} - \item \textbf{Conda/Anaconda:} - \begin{itemize} - \setlength\itemsep{2mm} - \item Conda has build instructions for software and their dependencies. - \item But it doesn't go down to the C library or the lower-level components of operating system. - \item It is written in Python (can't be used later when current Python is depreciated). - \item Authors of Uhse+2019\footnote{\url{http://dx.doi.org/10.1002/cppb.20097}} report\footnote{\url{https://github.com/conda-forge/conda-forge.github.io/issues/787}} that their Conda environment breaks roughly every 3 months\\(Conda environments need to be updated to be used later! Breaking reproducibility). - \end{itemize} - \item \textbf{Nix, or GNU Guix:} - \begin{itemize} - \setlength\itemsep{2mm} - \item Deliver perfectly reproducible builds (bit-wise reproducibility of software), needs root access. - \item Doesn't \emph{require} documentation of dependencies. - \end{itemize} - \item \textbf{Spack:} Similar to Nix/Guix but written in Python. - \end{itemize} -\end{frame} - -\begin{frame}{Existing technologies (workflow tools)} - \begin{itemize} - \setlength\itemsep{4mm} - \item \textbf{Binder:} (\url{https://mybinder.org}) Docker+Conda. - \item \textbf{Galaxy:} (\url{https://galaxyproject.org}) A web-based user interface, primarily designed for genomics. The GUI make it hard to automate, and has too many dependencies. Very similar to GenePattern (2008 to 2017): with +40,000 users and $\sim4000$ jobs running per week, but cut due to funding. - \item \textbf{Sciunit:} (\url{https://sciunit.run}) Parses program binaries to try to infer their dependencies and copy them. - \item \textbf{Popper:} (\url{https://falsifiable.us}), HCL (previously used by GitHub Actions) + Conda + Docker. - \item \textbf{WholeTale:} (\url{https://wholetale.org}) Jupyter + Conda + Docker. - \item \textbf{Image Processing On Line (IPOL) journal:} The best example of publishing algorithms/methods I have seen, only useful for very basic/low-level software. - \end{itemize} - \alert{Summary}: except for IPOL, most solutions surveyed have far too many dependencies to be usable \alert{beyond the immediate future}. -\end{frame} -\fi \end{document} |