aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--img/cpu-arch-astropy.pngbin506421 -> 240440 bytes
-rw-r--r--img/cpu-arch-gnuastro.pngbin460828 -> 220961 bytes
-rw-r--r--img/distros-astropy.pdfbin40387 -> 52228 bytes
-rw-r--r--img/distros-gnuastro.pdfbin61510 -> 75718 bytes
-rw-r--r--img/maneage-paper.pngbin0 -> 322232 bytes
-rw-r--r--img/maneage-webpage.pngbin0 -> 111125 bytes
-rw-r--r--slides-intro.tex273
7 files changed, 152 insertions, 121 deletions
diff --git a/img/cpu-arch-astropy.png b/img/cpu-arch-astropy.png
index 28f26fb..d13e4df 100644
--- a/img/cpu-arch-astropy.png
+++ b/img/cpu-arch-astropy.png
Binary files differ
diff --git a/img/cpu-arch-gnuastro.png b/img/cpu-arch-gnuastro.png
index f222380..adbdd2f 100644
--- a/img/cpu-arch-gnuastro.png
+++ b/img/cpu-arch-gnuastro.png
Binary files differ
diff --git a/img/distros-astropy.pdf b/img/distros-astropy.pdf
index 3cbea3f..db82659 100644
--- a/img/distros-astropy.pdf
+++ b/img/distros-astropy.pdf
Binary files differ
diff --git a/img/distros-gnuastro.pdf b/img/distros-gnuastro.pdf
index 2ada969..0c083fd 100644
--- a/img/distros-gnuastro.pdf
+++ b/img/distros-gnuastro.pdf
Binary files differ
diff --git a/img/maneage-paper.png b/img/maneage-paper.png
new file mode 100644
index 0000000..266133e
--- /dev/null
+++ b/img/maneage-paper.png
Binary files differ
diff --git a/img/maneage-webpage.png b/img/maneage-webpage.png
new file mode 100644
index 0000000..a262fd4
--- /dev/null
+++ b/img/maneage-webpage.png
Binary files differ
diff --git a/slides-intro.tex b/slides-intro.tex
index 8a22ccb..35d71cc 100644
--- a/slides-intro.tex
+++ b/slides-intro.tex
@@ -35,7 +35,7 @@
%% Set the title
\title{\huge\textbf{BIG} Data, \textbf{BIG} responsibility
- \\\vspace{2mm} \large Maneage: \emph{Man}aging data lin\emph{eage} for long-term and archivable reproducibility \\\vspace{1mm} \footnotesize (\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}, \href{https://doi.org/10.1109/MCSE.2021.3072860}{DOI:10.1109/MCSE.2021.3072860})}
+ \\\vspace{2mm} \large Maneage: \emph{Man}aging data lin\emph{eage} for long-term and archivable reproducibility \\\vspace{1mm} \footnotesize (Published in CiSE 23 (3), pp 82-91: \textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2021.3072860}{DOI:10.1109/MCSE.2021.3072860}}, \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}})}
%% Set the author
\author{\\
@@ -46,8 +46,8 @@
%% Set the date and insitutional logos.
\date{\footnotesize\vspace{-5mm}\\
- \textcolor{black}{SoftwareHeritage 5th Anniversary}\\
- \textcolor{black}{November 30th, 2021 (Inria, Paris)} \\
+ \textcolor{black}{S\'eminaires LERMA}\\
+ \textcolor{black}{December 2nd, 2021 (Paris Observatory)} \\
\tiny\vspace{9mm}
Most recent slides available in link below (this PDF is built from \href{http://git.maneage.org/slides-intro.git}{Git commit} \gitcommit):\\
\footnotesize\textcolor{blue}{\url{https://maneage.org/pdf/slides-intro.pdf}}\\
@@ -90,9 +90,6 @@
\end{frame}
-
-
-
\begin{frame}{Let's start with this nice image of the Wirlpool galaxy (M51): \small{\url{https://i.redd.it/jfqgpqg0hfk11.jpg}}}
\begin{center}
\includegraphics[width=0.8\linewidth]{img/m51-amateur.jpg}
@@ -239,6 +236,62 @@ for computational reproducibility]
+ \begin{frame}{Our solution: CiSE 23 (3), pp 82-91: \textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2021.3072860}{DOI:10.1109/MCSE.2021.3072860}}, \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}}}
+ \begin{columns}
+ \column{0.4\linewidth}
+ \includegraphics[width=\linewidth]{img/maneage-paper.png}
+ \column{0.6\linewidth}
+ \includegraphics[width=\linewidth]{img/maneage-webpage.png}
+ \begin{center}
+ \huge{https://maneage.org}
+ \end{center}
+ \end{columns}
+ \end{frame}
+
+
+
+ \begin{frame}{Recognition 1: RDA adoption grant (2019) to IAC for Maneage}
+ \begin{center}
+ \includegraphics[width=3cm]{img/rda.png}\hspace{1cm}
+ \includegraphics[width=1.8cm]{img/iac.png}
+ \includegraphics[width=\linewidth]{img/h2020.jpg}
+ \end{center}
+
+ \vspace{1cm} For Maneage, the \alert{IAC} is selected as
+ a \alert{Top European organization} funded to adopt RDA
+ Recommendations and Outputs.
+
+ \vspace{1cm}
+ \scriptsize
+ \begin{itemize}
+ \item Research Data Alliance was launched by the \alert{European
+ Commission}, NSF, National Institute of Standards and
+ Technology, and the Australian Government’s Department of
+ Innovation.
+ \item RDA Outputs are the technical and social infrastructure
+ solutions developed by RDA Working Groups or Interest
+ Groups that enable data sharing, exchange, and
+ interoperability.
+ \end{itemize}
+
+ \vspace{0.2cm}
+ \end{frame}
+
+
+
+
+
+ \begin{frame}{Recognition 2: ``News and Views'' in Nature Astronomy (\textcolor{blue}{\href{https://doi.org/10.1038/s41550-021-01402-3}{DOI:10.1038/s41550-021-01402-3}})}
+ \begin{center}
+ \includegraphics[width=0.8\linewidth]{img/nature-astronomy.png}
+ \end{center}
+
+ \vspace{-2mm}
+ \footnotesize Free-to-read link: \textcolor{blue}{\url{https://rdcu.be/cmYVx}}
+ \end{frame}
+
+
+
\begin{frame}[t]{Definitions \& Clarification \hspace{1.6cm} {\normalsize(from the National Academies report in 2019, \href{http://doi.org/10.17226/25303}{DOI:10.17226/25303})}}
@@ -355,14 +408,15 @@ for computational reproducibility]
\begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
\newcommand{\sver}{}
\begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
- \begin{frame}{Different package managers have different versions of software (repology.org, 2019/11/20)}
+ \begin{frame}[t]{Different package managers have different versions of software (repology.org, 2021/12/02)}
\begin{columns}
\column{7cm} \center
Astropy\\
- \includegraphics[width=2.2cm]{img/distros-astropy.pdf}
+ \includegraphics[width=2.8cm]{img/distros-astropy.pdf}
\column{7cm} \center
GNU Astronomy Utilities (Gnuastro)\\
- \includegraphics[width=2.7cm]{img/distros-gnuastro.pdf}
+ \includegraphics[trim={0 8cm 0 0}, clip, width=2.9cm]{img/distros-gnuastro.pdf}
+ \includegraphics[trim={0 0 0 16cm}, clip, width=2.9cm]{img/distros-gnuastro.pdf}
\end{columns}
\end{frame}
\newcommand{\srep}{}
@@ -387,13 +441,13 @@ for computational reproducibility]
al. 2020, CiSE, DOI:\textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2019.2949413}{10.1109/MCSE.2019.2949413}}).
}
\end{frame}
- \begin{frame}{Impact of ``Dependency hell'' on native building in various hardware (CPU architectures)}
+ \begin{frame}{Impact of ``Dependency hell'' on native building in various hardware (CPU architectures), retrieved from Debian on 2021/12/02}
\begin{columns}
\column{7cm}
- \includegraphics[width=0.9\linewidth]{img/cpu-arch-astropy.png}
+ \includegraphics[width=\linewidth]{img/cpu-arch-astropy.png}
Astropy depends on Matplotlib
\column{6cm}
- \includegraphics[width=0.9\linewidth]{img/cpu-arch-gnuastro.png}
+ \includegraphics[width=1.05\linewidth]{img/cpu-arch-gnuastro.png}
GNU Astronomy Utilities doesn't.
\end{columns}
\end{frame}
@@ -1237,6 +1291,7 @@ for computational reproducibility]
+
\begin{frame}{Publication of the project}
A reproducible project using Maneage will have the following
@@ -1251,19 +1306,25 @@ for computational reproducibility]
\alert{negligible} compared to a single figure in a paper
(usually $\sim100$ kilo-bytes).
+ \pause
+
\vspace{7mm} The project's pipeline (customized Maneage) can be
\alert{published} in
\begin{itemize}
\item \alert{arXiv}: uploaded with the \LaTeX{} source to always
stay with the paper \\(for example
- \textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}}). The
- file containing all macros must also be uploaded so arXiv's
- server can easily build the \LaTeX{} source.
- \item \alert{Software Heritage} which is a long-term archival repository for source code, providing permanent links to cite any part of the code.
- For example see the \textcolor{blue}{\href{https://archive.softwareheritage.org/browse/origin/directory/?origin_url=https://gitlab.com/makhlaghi/maneage-paper.git}{Maneage paper's source there}}.
+ \textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}} or \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}}).
\item \alert{Zenodo}: Along with all the input datasets (many
Gigabytes) and software \\(for example
- \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.3408481}{zenodo.3408481}}) and given a unique DOI.
+ \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.3872247}{zenodo.3872247}}) and given a unique DOI.
+ \begin{itemize}
+ \item ... and put links to data in paper! See ending of caption of Figure 1 in the \textcolor{blue}{\href{https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9403875}{Maneage paper}}.
+ \end{itemize}
+ \item \alert{Software Heritage}: to archive the full version-controlled history of the project.\\(for example
+ {\small \textcolor{blue}{\href{https://archive.softwareheritage.org/swh:1:dir:33fea87068c1612daf011f161b97787b9a0df39f;origin=http://git.maneage.org/paper-concept.git/;visit=swh:1:snp:89af43c4b076a17d9298299f224247038af355ea;anchor=swh:1:rev:313db0b04bd3499f83d9e79fd7e92578cd367c2b}{swh:1:dir:33fea87068c1612daf011f161b97787b9a0df39fk}}})
+ \begin{itemize}
+ \item ... and put links to exact parts of the code! See caption of Listing 1 in the \textcolor{blue}{\href{https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9403875}{Maneage paper}}.
+ \end{itemize}
\end{itemize}
\end{frame}
@@ -1344,32 +1405,44 @@ for computational reproducibility]
- \begin{frame}{RDA adoption grant (2019) to IAC for Maneage}
- \begin{center}
- \includegraphics[width=3cm]{img/rda.png}\hspace{1cm}
- \includegraphics[width=1.8cm]{img/iac.png}
- \includegraphics[width=\linewidth]{img/h2020.jpg}
- \end{center}
- \vspace{1cm} For Maneage, the \alert{IAC} is selected as
- a \alert{Top European organization} funded to adopt RDA
- Recommendations and Outputs.
+ \begin{frame}{Summary:}
- \vspace{1cm}
- \scriptsize
- \begin{itemize}
- \item Research Data Alliance was launched by the \alert{European
- Commission}, NSF, National Institute of Standards and
- Technology, and the Australian Government’s Department of
- Innovation.
- \item RDA Outputs are the technical and social infrastructure
- solutions developed by RDA Working Groups or Interest
- Groups that enable data sharing, exchange, and
- interoperability.
- \end{itemize}
+ Maneage is introduced as a customizable template that will do the
+ following steps/instructions (all in simple plain text files).
+ \begin{itemize}
+ \item \alert{Automatically downloads} the necessary
+ \emph{software} and \emph{data}.
+ \item \alert{Builds} the software in a \alert{closed
+ environment}.
+ \item Runs the software on data to \alert{generate} the final
+ \alert{research results}.
+ \item Only parts affected by a modifcation are re-done.
+ \item Using LaTeX macros, paper's figures, tables and numbers
+ will be \alert{Automatically updated}.
+ \item The whole project is under \alert{version control} (Git)
+ \alert{encouraging tests/experimentation}.
+ \item The \alert{Git commit hash} of the project source, is
+ \alert{printed} in the paper and \alert{on output} data
+ products.
+ \item \colorbox{green!30!white}{These slides are available at
+ \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro.pdf}}.}
+ \end{itemize}
- \vspace{0.2cm}
+ \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
+ top=1pt, bottom=1pt]
+ For a technical description of Maneage's implementation, as well
+ as a checklist to customize it, and tips on good practices,
+ please see this page:
+
+ \textcolor{blue}{\url{https://gitlab.com/maneage/project/-/blob/maneage/README-hacking.md}}
+
+ \vspace{3mm}
+ \hfill \colorbox{green!30!white}{Feel free to contact me: \textcolor{blue}{\large{\url{mohammad@akhlaghi.org}}}}
+ \end{tcolorbox}
\end{frame}
+\end{document}
+
@@ -1405,91 +1478,49 @@ In summary, they only \alert{store a built} environment (they are outputs, not g
-\begin{frame}{Existing technologies (Package managers)}
-
- \begin{itemize}
- \item \textbf{Operating system package managers:}
- \begin{itemize}
- \setlength\itemsep{2mm}
- \item For example \texttt{apt} or \texttt{yum} for Debian-based and RedHat-based GNU/Linux operating systems\\(the most common way to install software).
- \item Tightly intertwined with the operating system's components\\(arbitrary control of software versions is not easily possible).
- \item Older software (for example +5 years) is usually removed.
- \end{itemize}
- \item \textbf{Conda/Anaconda:}
- \begin{itemize}
- \setlength\itemsep{2mm}
- \item Conda has build instructions for software and their dependencies.
- \item But it doesn't go down to the C library or the lower-level components of operating system.
- \item It is written in Python (can't be used later when current Python is depreciated).
- \item Authors of Uhse+2019\footnote{\url{http://dx.doi.org/10.1002/cppb.20097}} report\footnote{\url{https://github.com/conda-forge/conda-forge.github.io/issues/787}} that their Conda environment breaks roughly every 3 months\\(Conda environments need to be updated to be used later! Breaking reproducibility).
- \end{itemize}
- \item \textbf{Nix, or GNU Guix:}
- \begin{itemize}
- \setlength\itemsep{2mm}
- \item Deliver perfectly reproducible builds (bit-wise reproducibility of software), needs root access.
- \item Doesn't \emph{require} documentation of dependencies.
- \end{itemize}
- \item \textbf{Spack:} Similar to Nix/Guix but written in Python.
- \end{itemize}
-\end{frame}
-
-\begin{frame}{Existing technologies (workflow tools)}
- \begin{itemize}
- \setlength\itemsep{4mm}
- \item \textbf{Binder:} (\url{https://mybinder.org}) Docker+Conda.
- \item \textbf{Galaxy:} (\url{https://galaxyproject.org}) A web-based user interface, primarily designed for genomics. The GUI make it hard to automate, and has too many dependencies. Very similar to GenePattern (2008 to 2017): with +40,000 users and $\sim4000$ jobs running per week, but cut due to funding.
- \item \textbf{Sciunit:} (\url{https://sciunit.run}) Parses program binaries to try to infer their dependencies and copy them.
- \item \textbf{Popper:} (\url{https://falsifiable.us}), HCL (previously used by GitHub Actions) + Conda + Docker.
- \item \textbf{WholeTale:} (\url{https://wholetale.org}) Jupyter + Conda + Docker.
- \item \textbf{Image Processing On Line (IPOL) journal:} The best example of publishing algorithms/methods I have seen, only useful for very basic/low-level software.
- \end{itemize}
- \alert{Summary}: except for IPOL, most solutions surveyed have far too many dependencies to be usable \alert{beyond the immediate future}.
+%\begin{frame}{Existing technologies (Package managers)}
+%
+% \begin{itemize}
+% \item \textbf{Operating system package managers:}
+% \begin{itemize}
+% \setlength\itemsep{2mm}
+% \item For example \texttt{apt} or \texttt{yum} for Debian-based and RedHat-based GNU/Linux operating systems\\(the most common way to install software).
+% \item Tightly intertwined with the operating system's components\\(arbitrary control of software versions is not easily possible).
+% \item Older software (for example +5 years) is usually removed.
+% \end{itemize}
+% \item \textbf{Conda/Anaconda:}
+% \begin{itemize}
+% \setlength\itemsep{2mm}
+% \item Conda has build instructions for software and their dependencies.
+% \item But it doesn't go down to the C library or the lower-level components of operating system.
+% \item It is written in Python (can't be used later when current Python is depreciated).
+% \item Authors of Uhse+2019\footnote{\url{http://dx.doi.org/10.1002/cppb.20097}} report\footnote{\url{https://github.com/conda-forge/conda-forge.github.io/issues/787}} that their Conda environment breaks roughly every 3 months\\(Conda environments need to be updated to be used later! Breaking reproducibility).
+% \end{itemize}
+% \item \textbf{Nix, or GNU Guix:}
+% \begin{itemize}
+% \setlength\itemsep{2mm}
+% \item Deliver perfectly reproducible builds (bit-wise reproducibility of software), needs root access.
+% \item Doesn't \emph{require} documentation of dependencies.
+% \end{itemize}
+% \item \textbf{Spack:} Similar to Nix/Guix but written in Python.
+% \end{itemize}
+%\end{frame}
+%
+%\begin{frame}{Existing technologies (workflow tools)}
+% \begin{itemize}
+% \setlength\itemsep{4mm}
+% \item \textbf{Binder:} (\url{https://mybinder.org}) Docker+Conda.
+% \item \textbf{Galaxy:} (\url{https://galaxyproject.org}) A web-based user interface, primarily designed for genomics. The GUI make it hard to automate, and has too many dependencies. Very similar to GenePattern (2008 to 2017): with +40,000 users and $\sim4000$ jobs running per week, but cut due to funding.
+% \item \textbf{Sciunit:} (\url{https://sciunit.run}) Parses program binaries to try to infer their dependencies and copy them.
+% \item \textbf{Popper:} (\url{https://falsifiable.us}), HCL (previously used by GitHub Actions) + Conda + Docker.
+% \item \textbf{WholeTale:} (\url{https://wholetale.org}) Jupyter + Conda + Docker.
+% \item \textbf{Image Processing On Line (IPOL) journal:} The best example of publishing algorithms/methods I have seen, only useful for very basic/low-level software.
+% \end{itemize}
+% \alert{Summary}: except for IPOL, most solutions surveyed have far too many dependencies to be usable \alert{beyond the immediate future}.
\end{frame}
-
- \begin{frame}{Summary:}
-
- Maneage is introduced as a customizable template that will do the
- following steps/instructions (all in simple plain text files).
- \begin{itemize}
- \item \alert{Automatically downloads} the necessary
- \emph{software} and \emph{data}.
- \item \alert{Builds} the software in a \alert{closed
- environment}.
- \item Runs the software on data to \alert{generate} the final
- \alert{research results}.
- \item Only parts affected by a modifcation are re-done.
- \item Using LaTeX macros, paper's figures, tables and numbers
- will be \alert{Automatically updated}.
- \item The whole project is under \alert{version control} (Git)
- \alert{encouraging tests/experimentation}.
- \item The \alert{Git commit hash} of the project source, is
- \alert{printed} in the paper and \alert{on output} data
- products.
- \item \colorbox{green!30!white}{These slides are available at
- \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro.pdf}}.}
- \end{itemize}
-
- \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
- top=1pt, bottom=1pt]
- For a technical description of Maneage's implementation, as well
- as a checklist to customize it, and tips on good practices,
- please see this page:
-
- \textcolor{blue}{\url{https://gitlab.com/maneage/project/-/blob/maneage/README-hacking.md}}
-
- \vspace{3mm}
- \hfill \colorbox{green!30!white}{Feel free to contact me: \textcolor{blue}{\large{\url{mohammad@akhlaghi.org}}}}
- \end{tcolorbox}
- \end{frame}
-\end{document}
-
-
-
-
-
% \begin{frame}{Funding to help adoption of template}
% \begin{itemize}
% \setlength\itemsep{5mm}