diff options
-rw-r--r-- | img/dependencies-conda-initial.png | bin | 0 -> 128310 bytes | |||
-rw-r--r-- | img/dependencies-jupyter.png | bin | 0 -> 188646 bytes | |||
-rw-r--r-- | img/filters-lsst.png | bin | 0 -> 41813 bytes | |||
-rw-r--r-- | img/minijpas-web.png | bin | 0 -> 1786685 bytes | |||
-rw-r--r-- | img/oaj.jpg | bin | 0 -> 43544 bytes | |||
-rw-r--r-- | slides-intro-short.tex | 422 |
6 files changed, 329 insertions, 93 deletions
diff --git a/img/dependencies-conda-initial.png b/img/dependencies-conda-initial.png Binary files differnew file mode 100644 index 0000000..739260b --- /dev/null +++ b/img/dependencies-conda-initial.png diff --git a/img/dependencies-jupyter.png b/img/dependencies-jupyter.png Binary files differnew file mode 100644 index 0000000..19a4f9c --- /dev/null +++ b/img/dependencies-jupyter.png diff --git a/img/filters-lsst.png b/img/filters-lsst.png Binary files differnew file mode 100644 index 0000000..0376c7f --- /dev/null +++ b/img/filters-lsst.png diff --git a/img/minijpas-web.png b/img/minijpas-web.png Binary files differnew file mode 100644 index 0000000..b391240 --- /dev/null +++ b/img/minijpas-web.png diff --git a/img/oaj.jpg b/img/oaj.jpg Binary files differnew file mode 100644 index 0000000..a065eb4 --- /dev/null +++ b/img/oaj.jpg diff --git a/slides-intro-short.tex b/slides-intro-short.tex index 70f1a52..6707458 100644 --- a/slides-intro-short.tex +++ b/slides-intro-short.tex @@ -1,6 +1,6 @@ % LaTeX source of slides on reproducible paper. % -% Copyright (C) 2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> +% Copyright (C) 2020-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> % % This LaTeX source is free software: you can redistribute it and/or % modify it under the terms of the GNU General Public License as @@ -38,7 +38,7 @@ %% Set the title \title{\huge\textbf{BIG} Data, \textbf{BIG} responsibility - \\\vspace{2mm} \large Maneage: \emph{Man}aging data lin\emph{eage} for long-term and archivable reproducibility \\\vspace{1mm} \footnotesize (Published in CiSE 23 (3), pp 82-91: \textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2021.3072860}{DOI:10.1109/MCSE.2021.3072860}}, \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}})} + \\\vspace{2mm} \large Maneage: Managing data lineage for long-term and archivable reproducibility \\\vspace{1mm} \footnotesize (Published in CiSE 23 (3), pp 82-91: \textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2021.3072860}{DOI:10.1109/MCSE.2021.3072860}}, \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}})} %% Set the author \author{\\ @@ -49,8 +49,8 @@ %% Set the date and insitutional logos. \date{\footnotesize\vspace{-5mm}\\ - \textcolor{black}{SoftwareHeritage 5th Anniversary}\\ - \textcolor{black}{November 30th, 2021 (Inria, Paris)} \\ + \textcolor{black}{\href{https://www.eso.org/sci/meetings/2022/REPRODUCIBILITY2022.html}{Reproducibility and Open Science in Astronomy}}\\ + \textcolor{black}{May 12th, 2022 (ESO, virtual)} \\ \tiny\vspace{9mm} Most recent slides available in link below (this PDF is built from \href{http://git.maneage.org/slides-intro.git}{Git commit} \gitcommit):\\ \footnotesize\textcolor{blue}{\url{https://maneage.org/pdf/slides-intro-short.pdf}}\\ @@ -94,6 +94,40 @@ + + %% Introduction to OAJ and J-PAS + \begin{frame}{Our main project: \textbf{J-PAS} with Observatorio Observatorio Astrofísico de Javalambre (OAJ)} + J-PAS will observe the northern sky in \alert{56 medium-band filters} ($\sim14$nm): + + \begin{center} + \includegraphics[width=0.9\linewidth]{img/oaj.jpg} + \end{center} + \end{frame} + + \begin{frame}{LSST filter: 6 {\footnotesize(image from \href{https://speclite.readthedocs.io/en/latest/filters.html}{speclite docs})}:} + \begin{center} + \vspace{-3mm} + \includegraphics[width=0.8\linewidth]{img/filters-lsst.png} + \end{center} + \end{frame} + + \begin{frame}{J-PAS filters: 56 (Bonoli+2021: \href{https://ui.adsabs.harvard.edu/abs/2021A\%26A...653A..31B}{2021A\&A...653A..31B})} + \begin{center} + \includegraphics[width=\linewidth]{img/filters-jpas.pdf} + \end{center} + \end{frame} + + \begin{frame}{Result: photo-\alert{spectra} of \alert{every pixel} of the non-Galactic northern sky (like an IFU)!} + \url{http://archive.cefca.es/catalogues/minijpas-pdr201912/navigator.html} + + \vspace{3mm} + \includegraphics[width=\linewidth]{img/minijpas-web.png} + \end{frame} + + + + + \newcommand{\allopacity}{1} \ifdefined\longformat \begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame} @@ -169,6 +203,274 @@ + \begin{frame}{Notebooks are not long-term solutions {\small (see appendices of Akhlaghi+2021: \href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018})}} + \begin{columns} + \column{0.4\linewidth} + \includegraphics[width=\linewidth]{img/dependencies-conda-initial.png} + \column{0.4\linewidth} + \includegraphics[width=0.9\linewidth]{img/dependencies-jupyter.png} + \column{0.2\linewidth} + Results from run on May 10th, 2022: + + \pause + \vspace{7mm} + Conda setup:\\\alert{39 dependencies} + + \pause + \vspace{7mm} + Jupyter (with Pip):\\\alert{61 dependencies} + + \pause + \vspace{7mm} + Web browser has more dependencies; with fluid/\alert{evolving} web technologies. + + \pause + \vspace{7mm} + They can contain \alert{binary} components. + \end{columns} + \end{frame} + + + + + + \begin{frame}{The dependency tree (Matplotlib is \emph{only one} dependency of Jupyter)} + \Wider[5em]{ + %\vspace{5mm} + \begin{center} + \includegraphics[width=0.9\linewidth]{img/matplotlib.png} + \end{center} + + \vspace{3mm}\tiny From ``Attributing and Referencing (Research) + Software: Best Practices and Outlook from Inria'' (Alliez et + al. 2020, CiSE, DOI:\textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2019.2949413}{10.1109/MCSE.2019.2949413}}). + } + \end{frame} + + + + + \begin{frame}{Are containers the solution? Yes, but ... for the short term} + \pause + \begin{itemize} + \setlength\itemsep{5mm} + \item Containers are \alert{large} (many giga-bytes) + \begin{itemize} + \setlength\itemsep{3mm} + \pause + \item \alert{Expensive} to archive! + \pause + \item Example: \textcolor{blue}{\href{https://is.ieis.tue.nl/staff/pvgorp/share}{SHARE}} (enabling remote connection to Virtual machines with project environment): + \begin{itemize} + \setlength\itemsep{2mm} + \item \alert{2nd place} in Elsevier's ``Executable paper grand challenge'' of 2011. + \item SHARE's image repository was taken offline in 2019! + \item Even the challenge webpage is no longer available: \textcolor{blue}{\href{http://www.executablepapers.com}{http://www.executablepapers.com}} + \end{itemize} + \end{itemize} + \pause + \item Container are \alert{binary} (tailored to certain kernels+CPUs) + \begin{itemize} + \setlength\itemsep{3mm} + \pause + \item Only guarantee the Long Term Release kernels. + \begin{itemize} + \setlength\itemsep{2mm} + \item Become un-readable, multi-gigabyte binary blobs in $\sim10$ years! + \item Even if you store them on Zenodo! + \end{itemize} + \pause + \item Only on common CPUs architectures. + \end{itemize} + \pause + \item Containers \alert{themselves} are \alert{hard to reproduce}. + \begin{itemize} + \item Example: \textcolor{blue}{\href{https://ui.adsabs.harvard.edu/abs/2020CSE....22a.102M}{2020CSE....22a.102M}} use `\texttt{FROM ubuntu:16.04}', but if run today, \textcolor{blue}{\href{https://partner-images.canonical.com/core/xenial}{images are from 2021}}. + \end{itemize} + \end{itemize} + \end{frame} + + + + + + \begin{frame} + \Large For \alert{longevity issues} with Jupyter, Conda, Containers and etc, see... + + \vspace{5mm} + \hfill ... the appendices in \textcolor{blue}{\href{https://arxiv.org/pdf/2006.03018.pdf}{arXiv:2006.03018}} + \end{frame} + + + + + + \begin{frame}{Our solution: CiSE 23 (3), pp 82-91: \textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2021.3072860}{DOI:10.1109/MCSE.2021.3072860}}, \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}}} + \begin{columns} + \column{0.4\linewidth} + \includegraphics[width=\linewidth]{img/maneage-paper.png} + \column{0.6\linewidth} + \includegraphics[width=\linewidth]{img/maneage-webpage.png} + \begin{center} + \huge{https://maneage.org} + \end{center} + \end{columns} + \end{frame} + + + + + + \begin{frame}{Recognition 1: RDA adoption grant (2019) to IAC for Maneage} + \begin{center} + \includegraphics[width=3cm]{img/rda.png}\hspace{1cm} + \includegraphics[width=1.8cm]{img/iac.png} + \includegraphics[width=\linewidth]{img/h2020.jpg} + \end{center} + + \vspace{1cm} For Maneage, the \alert{IAC} is selected as + a \alert{Top European organization} funded to adopt RDA + Recommendations and Outputs. + + \vspace{1cm} + \scriptsize + \begin{itemize} + \item Research Data Alliance was launched by the \alert{European + Commission}, NSF, National Institute of Standards and + Technology, and the Australian Government’s Department of + Innovation. + \item RDA Outputs are the technical and social infrastructure + solutions developed by RDA Working Groups or Interest + Groups that enable data sharing, exchange, and + interoperability. + \end{itemize} + + \vspace{0.2cm} + \end{frame} + + + + + + \begin{frame}{Recognition 2: ``News and Views'' in Nature Astronomy (\textcolor{blue}{\href{https://doi.org/10.1038/s41550-021-01402-3}{DOI:10.1038/s41550-021-01402-3}})} + \begin{center} + \includegraphics[width=0.8\linewidth]{img/nature-astronomy.png} + \end{center} + + \vspace{-2mm} + \footnotesize Free-to-read link: \textcolor{blue}{\url{https://rdcu.be/cmYVx}} + \end{frame} + + + + + + \begin{frame}[t]{Definitions \& Clarification \hspace{1.6cm} {\normalsize(from the National Academies report in 2019, \href{http://doi.org/10.17226/25303}{DOI:10.17226/25303})}} + \vspace{-5mm} + \begin{columns}[t] + \column{0.5\linewidth} + \begin{center} + \large\textbf{Replicability (hardware/statistical)} + \rule{0.5\linewidth}{1pt} + \end{center} + \begin{itemize} + \setlength\itemsep{0.5em} + \item Involves data \alert{collection}. + \item Inherently includes \alert{measurements errors}\\(can + never be exactly reproduced). + \item Example: Raw telescope image/spectra. + \item \alert{\textbf{NOT DISCUSSED HERE.}} + \end{itemize} + + \vspace{3.5mm} + \begin{center} + \vspace{-5mm} + \includegraphics[width=0.7\linewidth]{img/hale-prime-focus.jpg}\\ + \vspace{-0.6mm} + \tiny \href{http://slittlefair.staff.shef.ac.uk/teaching/phy217/lectures/telescopes/L07/index.html}{http://slittlefair.staff.shef.ac.uk} + \end{center} + + \column{0.5\linewidth} + \end{columns} + \end{frame} + \begin{frame}[t]{Definitions \& Clarification \hspace{1.6cm} {\normalsize(from the National Academies report in 2019, \href{http://doi.org/10.17226/25303}{DOI:10.17226/25303})}} + \vspace{-5mm} + \begin{columns}[t] + \column{0.5\linewidth} + \begin{center} + \large\textbf{Replicability (hardware/statistical)} + \rule{0.5\linewidth}{1pt} + \end{center} + \begin{itemize} + \setlength\itemsep{0.5em} + \item Involves data \alert{collection}. + \item Inherently includes \alert{measurements errors}\\(can + never be exactly reproduced). + \item Example: Raw telescope image/spectra. + \item \alert{\textbf{NOT DISCUSSED HERE.}} + \end{itemize} + + \vspace{3.5mm} + \begin{center} + \vspace{-5mm} + \includegraphics[width=0.7\linewidth]{img/hale-prime-focus-marked.jpg}\\ + \vspace{-0.6mm} + \tiny \href{http://slittlefair.staff.shef.ac.uk/teaching/phy217/lectures/telescopes/L07/index.html}{http://slittlefair.staff.shef.ac.uk} + \end{center} + + \column{0.5\linewidth} + \end{columns} + \end{frame} + \begin{frame}[t]{Definitions \& Clarification \hspace{1.6cm} {\normalsize(from the National Academies report in 2019, \href{http://doi.org/10.17226/25303}{DOI:10.17226/25303})}} + \vspace{-5mm} + \begin{columns}[t] + \column{0.5\linewidth} + \begin{center} + \large\textbf{Replicability (hardware/statistical)} + \rule{0.5\linewidth}{1pt} + \end{center} + \begin{itemize} + \setlength\itemsep{0.5em} + \item Involves data \alert{collection}. + \item Inherently includes \alert{measurements errors}\\(can + never be exactly reproduced). + \item Example: Raw telescope image/spectra. + \item \alert{\textbf{NOT DISCUSSED HERE.}} + \end{itemize} + + \vspace{3.5mm} + \begin{center} + \vspace{-5mm} + \includegraphics[width=0.7\linewidth]{img/hale-prime-focus.jpg}\\ + \vspace{-0.6mm} + \tiny \href{http://slittlefair.staff.shef.ac.uk/teaching/phy217/lectures/telescopes/L07/index.html}{http://slittlefair.staff.shef.ac.uk} + \end{center} + + \column{0.5\linewidth} + \begin{center} + \large\textbf{Reproducibility (Software/Deterministic)} + \rule{0.5\linewidth}{1pt} + \end{center} + \begin{itemize} + \setlength\itemsep{1em} + \item Involves data \alert{analysis}, or simulations. + \item Starts \alert{after} data is collected/digitized. + \item Example: $2+2=4$ (i.e., sum of datasets). + \item \textbf{\textcolor{green!50!black}{DISCUSSED HERE.}} + \end{itemize} + + \centering + \vspace{3mm} + \includegraphics[width=0.8\linewidth]{img/digital-tunnel.jpg}\\ + \vspace{-0.6mm} + \tiny \href{https://tsongas.com/newsletter_articles/the-new-electronic-version-of-the-advantage/digital-tunnel-wallpaper/}{https://tsongas.com} + \end{columns} + \end{frame} + + + + + \begin{frame}{Founding criteria} \begin{tcolorbox}[title=Basic/simple principle:] \centering Science is defined by its METHOD, \alert{not} its @@ -325,24 +627,6 @@ - \begin{frame}{Example: Matplotlib (a Python visualization library) build dependencies} - \Wider[5em]{ - %\vspace{5mm} - \begin{center} - \includegraphics[width=0.9\linewidth]{img/matplotlib.png} - \end{center} - - \vspace{3mm}\tiny From ``Attributing and Referencing (Research) - Software: Best Practices and Outlook from Inria'' (Alliez et - al. 2020, CiSE, DOI:\textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2019.2949413}{10.1109/MCSE.2019.2949413}}). - } - \end{frame} - - - - - - \begin{frame}{Advantages of this build system} \begin{columns} @@ -832,15 +1116,20 @@ \alert{negligible} compared to a single figure in a paper (usually $\sim100$ kilo-bytes). - \vspace{8mm} The project's pipeline (customized Maneage) can be + \vspace{6mm} The project's pipeline (customized Maneage) can be \alert{published} in \begin{itemize} \item \alert{arXiv}: uploaded with the \LaTeX{} source to always stay with the paper \\(for example - \textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}} or \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}}). + \textcolor{blue}{\href{https://arxiv.org/abs/1909.11230}{arXiv:1909.11230}}, + \textcolor{blue}{\href{https://arxiv.org/abs/1911.01430}{arXiv:1911.01430}}, + \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}}, + \textcolor{blue}{\href{https://arxiv.org/abs/2007.11779}{arXiv:2007.11779}}\\ + \textcolor{blue}{\href{https://arxiv.org/abs/2010.03742}{arXiv:2010.03742}}, + \textcolor{blue}{\href{https://arxiv.org/abs/2112.14174}{arXiv:2112.14174}}). \item \alert{Zenodo}: Along with all the input datasets (many Gigabytes) and software \\(for example - \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.3872248}{zenodo.3872248}}) and given a unique DOI. + \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.6533902}{zenodo.6533902}}, also see comments in arXiv links above) and given a unique DOI. \item \alert{Software Heritage}: to archive the full version-controlled history of the project.\\(for example {\small \textcolor{blue}{\href{https://archive.softwareheritage.org/swh:1:dir:33fea87068c1612daf011f161b97787b9a0df39f;origin=http://git.maneage.org/paper-concept.git/;visit=swh:1:snp:89af43c4b076a17d9298299f224247038af355ea;anchor=swh:1:rev:313db0b04bd3499f83d9e79fd7e92578cd367c2b}{swh:1:dir:33fea87068c1612daf011f161b97787b9a0df39fk}}}) \end{itemize} @@ -906,85 +1195,32 @@ - \begin{frame}{Achievements: RDA adoption grant (2019) to IAC for Maneage} - \begin{center} - \includegraphics[width=3cm]{img/rda.png}\hspace{1cm} - \includegraphics[width=1.8cm]{img/iac.png} - \includegraphics[width=\linewidth]{img/h2020.jpg} - \end{center} - - \vspace{1cm} For Maneage, the \alert{IAC} is selected as - a \alert{Top European organization} funded to adopt RDA - Recommendations and Outputs. - - \vspace{1cm} - \scriptsize - \begin{itemize} - \item Research Data Alliance was launched by the \alert{European - Commission}, NSF, National Institute of Standards and - Technology, and the Australian Government’s Department of - Innovation. - \item RDA Outputs are the technical and social infrastructure - solutions developed by RDA Working Groups or Interest - Groups that enable data sharing, exchange, and - interoperability. - \end{itemize} - - \vspace{0.2cm} - \end{frame} - - - - - - \begin{frame}{Achievements: ``News and Views'' in Nature Astronomy (\textcolor{blue}{\href{https://doi.org/10.1038/s41550-021-01402-3}{DOI:10.1038/s41550-021-01402-3}})} - \begin{center} - \includegraphics[width=0.8\linewidth]{img/nature-astronomy.png} - \end{center} - - \vspace{-2mm} - \footnotesize Free-to-read link: \textcolor{blue}{\url{https://rdcu.be/cmYVx}} - \end{frame} \begin{frame}{Summary:} - - Maneage and its principles are described in \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}}. - It is a customizable template that will do the following steps/instructions (all in simple plain text files). + Maneage (\textcolor{blue}{\url{https://maneage.org}}) is a customizable template that will for research or data reduction: \begin{itemize} - \item \alert{Automatically downloads} the necessary - \emph{software} and \emph{data}. - \item \alert{Builds} the software in a \alert{closed - environment}. - \item Runs the software on data to \alert{generate} the final - \alert{research results}. - \item Modification of part of the analysis will only - result in re-doing that part, not the whole project. - \item Using LaTeX macros, paper's figures, tables and numbers - will be \alert{Automatically updated} after a change in - analysis. Allowing the scientist to focus on the scientific - interpretation. - \item The whole project is under \alert{version control} (Git) - to allow easy reversion to a previous state. This - \alert{encourages tests/experimentation} in the analysis. - \item The \alert{Git commit hash} of the project source, is - \alert{printed} in the published paper and \alert{saved on - output} data products. Ensuring the - integrity/reproducibility of the result. - \item \colorbox{green!30!white}{These slides are available at - \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro-short.pdf}}.} - \item \colorbox{green!15!white}{Longer slides are available at - \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro.pdf}}.} + \item \alert{Automatically downloads} the necessary \emph{software} and \emph{data}. + \item \alert{Builds} the software in a \alert{closed environment}. + \item Runs the software on data to \alert{generate} the final \alert{research results}. + \item Modification of part of the analysis will only result in re-doing that part, not the whole project. + \item Using LaTeX macros, paper's figures, tables and numbers will be \alert{Automatically updated}. + \item The whole project is under \alert{version control} (Git) to allow easy reversion to a previous state. This \alert{encourages tests/experimentation} in the analysis. + \item The \alert{Git commit hash} of the project source, is \alert{printed} in the published paper and \alert{saved on output} data products. Ensuring the integrity/reproducibility of the result. + \item \colorbox{green!30!white}{These slides are available at \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro-short.pdf}}.} + \item \colorbox{green!15!white}{Longer slides are available at \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro.pdf}}.} + \begin{itemize} + \item YouTube recording (May 2021): \textcolor{blue}{\url{https://www.youtube.com/watch?v=XdhRUhoMqw0}} + \end{itemize} + \item \colorbox{purple!20!white}{\small Matrix-protocol chat room: \texttt{\#maneage-general:matrix.org}} \end{itemize} \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt, top=1pt, bottom=1pt] - For a technical description of Maneage's implementation, as well - as a checklist to customize it, and tips on good practices, - please see this page: + For a technical description of Maneage's implementation, as well as a checklist to customize it, and tips on good practices, please see this page: \textcolor{blue}{\footnotesize\url{https://gitlab.com/maneage/project/-/blob/maneage/README-hacking.md}} \end{tcolorbox} |