aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--img/data-lineage-1.pdfbin5924 -> 6847 bytes
-rw-r--r--img/data-lineage-10.pdfbin13879 -> 26746 bytes
-rw-r--r--img/data-lineage-11.pdfbin14062 -> 29143 bytes
-rw-r--r--img/data-lineage-12.pdfbin14392 -> 30659 bytes
-rw-r--r--img/data-lineage-13.pdfbin14591 -> 31739 bytes
-rw-r--r--img/data-lineage-14.pdfbin14698 -> 33643 bytes
-rw-r--r--img/data-lineage-15.pdfbin14867 -> 40754 bytes
-rw-r--r--img/data-lineage-16.pdfbin15016 -> 0 bytes
-rw-r--r--img/data-lineage-17.pdfbin15061 -> 0 bytes
-rw-r--r--img/data-lineage-18.pdfbin15213 -> 0 bytes
-rw-r--r--img/data-lineage-19.pdfbin15593 -> 0 bytes
-rw-r--r--img/data-lineage-2.pdfbin6558 -> 8505 bytes
-rw-r--r--img/data-lineage-20.pdfbin15755 -> 0 bytes
-rw-r--r--img/data-lineage-21.pdfbin16009 -> 0 bytes
-rw-r--r--img/data-lineage-22.pdfbin16191 -> 0 bytes
-rw-r--r--img/data-lineage-3.pdfbin7353 -> 11884 bytes
-rw-r--r--img/data-lineage-4.pdfbin7627 -> 13709 bytes
-rw-r--r--img/data-lineage-5.pdfbin7804 -> 14821 bytes
-rw-r--r--img/data-lineage-6.pdfbin12608 -> 20722 bytes
-rw-r--r--img/data-lineage-7.pdfbin12804 -> 21780 bytes
-rw-r--r--img/data-lineage-8.pdfbin13040 -> 23239 bytes
-rw-r--r--img/data-lineage-9.pdfbin13725 -> 25089 bytes
-rw-r--r--img/tools-per-year-orig.jpgbin0 -> 46204 bytes
-rw-r--r--img/tools-per-year.pdfbin0 -> 11960 bytes
-rw-r--r--slides-intro-short.tex298
-rw-r--r--slides-intro.tex63
-rw-r--r--tex/git-branch.tex2
27 files changed, 193 insertions, 170 deletions
diff --git a/img/data-lineage-1.pdf b/img/data-lineage-1.pdf
index ffa9df7..358f5e6 100644
--- a/img/data-lineage-1.pdf
+++ b/img/data-lineage-1.pdf
Binary files differ
diff --git a/img/data-lineage-10.pdf b/img/data-lineage-10.pdf
index 263b150..5b260da 100644
--- a/img/data-lineage-10.pdf
+++ b/img/data-lineage-10.pdf
Binary files differ
diff --git a/img/data-lineage-11.pdf b/img/data-lineage-11.pdf
index 913e31f..aae7615 100644
--- a/img/data-lineage-11.pdf
+++ b/img/data-lineage-11.pdf
Binary files differ
diff --git a/img/data-lineage-12.pdf b/img/data-lineage-12.pdf
index 9b29c45..0ecbd1d 100644
--- a/img/data-lineage-12.pdf
+++ b/img/data-lineage-12.pdf
Binary files differ
diff --git a/img/data-lineage-13.pdf b/img/data-lineage-13.pdf
index 65b8b02..07baeca 100644
--- a/img/data-lineage-13.pdf
+++ b/img/data-lineage-13.pdf
Binary files differ
diff --git a/img/data-lineage-14.pdf b/img/data-lineage-14.pdf
index 31bfa27..0c0c9e9 100644
--- a/img/data-lineage-14.pdf
+++ b/img/data-lineage-14.pdf
Binary files differ
diff --git a/img/data-lineage-15.pdf b/img/data-lineage-15.pdf
index 409cee4..f925185 100644
--- a/img/data-lineage-15.pdf
+++ b/img/data-lineage-15.pdf
Binary files differ
diff --git a/img/data-lineage-16.pdf b/img/data-lineage-16.pdf
deleted file mode 100644
index d924ec2..0000000
--- a/img/data-lineage-16.pdf
+++ /dev/null
Binary files differ
diff --git a/img/data-lineage-17.pdf b/img/data-lineage-17.pdf
deleted file mode 100644
index 5ac0675..0000000
--- a/img/data-lineage-17.pdf
+++ /dev/null
Binary files differ
diff --git a/img/data-lineage-18.pdf b/img/data-lineage-18.pdf
deleted file mode 100644
index a800a44..0000000
--- a/img/data-lineage-18.pdf
+++ /dev/null
Binary files differ
diff --git a/img/data-lineage-19.pdf b/img/data-lineage-19.pdf
deleted file mode 100644
index 7513caa..0000000
--- a/img/data-lineage-19.pdf
+++ /dev/null
Binary files differ
diff --git a/img/data-lineage-2.pdf b/img/data-lineage-2.pdf
index e338e82..d79231f 100644
--- a/img/data-lineage-2.pdf
+++ b/img/data-lineage-2.pdf
Binary files differ
diff --git a/img/data-lineage-20.pdf b/img/data-lineage-20.pdf
deleted file mode 100644
index 13ffab5..0000000
--- a/img/data-lineage-20.pdf
+++ /dev/null
Binary files differ
diff --git a/img/data-lineage-21.pdf b/img/data-lineage-21.pdf
deleted file mode 100644
index c7d4372..0000000
--- a/img/data-lineage-21.pdf
+++ /dev/null
Binary files differ
diff --git a/img/data-lineage-22.pdf b/img/data-lineage-22.pdf
deleted file mode 100644
index f2155aa..0000000
--- a/img/data-lineage-22.pdf
+++ /dev/null
Binary files differ
diff --git a/img/data-lineage-3.pdf b/img/data-lineage-3.pdf
index a8a5671..9d0e77b 100644
--- a/img/data-lineage-3.pdf
+++ b/img/data-lineage-3.pdf
Binary files differ
diff --git a/img/data-lineage-4.pdf b/img/data-lineage-4.pdf
index 6d686a7..1eb0255 100644
--- a/img/data-lineage-4.pdf
+++ b/img/data-lineage-4.pdf
Binary files differ
diff --git a/img/data-lineage-5.pdf b/img/data-lineage-5.pdf
index 3ae9baf..3a6298f 100644
--- a/img/data-lineage-5.pdf
+++ b/img/data-lineage-5.pdf
Binary files differ
diff --git a/img/data-lineage-6.pdf b/img/data-lineage-6.pdf
index 374de91..38e1731 100644
--- a/img/data-lineage-6.pdf
+++ b/img/data-lineage-6.pdf
Binary files differ
diff --git a/img/data-lineage-7.pdf b/img/data-lineage-7.pdf
index b590898..6dc481e 100644
--- a/img/data-lineage-7.pdf
+++ b/img/data-lineage-7.pdf
Binary files differ
diff --git a/img/data-lineage-8.pdf b/img/data-lineage-8.pdf
index 7dd6425..c1b6d09 100644
--- a/img/data-lineage-8.pdf
+++ b/img/data-lineage-8.pdf
Binary files differ
diff --git a/img/data-lineage-9.pdf b/img/data-lineage-9.pdf
index 29c6e5a..2211343 100644
--- a/img/data-lineage-9.pdf
+++ b/img/data-lineage-9.pdf
Binary files differ
diff --git a/img/tools-per-year-orig.jpg b/img/tools-per-year-orig.jpg
new file mode 100644
index 0000000..049f7d5
--- /dev/null
+++ b/img/tools-per-year-orig.jpg
Binary files differ
diff --git a/img/tools-per-year.pdf b/img/tools-per-year.pdf
new file mode 100644
index 0000000..8890226
--- /dev/null
+++ b/img/tools-per-year.pdf
Binary files differ
diff --git a/slides-intro-short.tex b/slides-intro-short.tex
index 88c14aa..79506ea 100644
--- a/slides-intro-short.tex
+++ b/slides-intro-short.tex
@@ -18,6 +18,9 @@
% Basic LaTeX settings.
\documentclass[9pt,usenames,dvipsnames,aspectratio=169]{beamer}
+% Make it super short.
+%\newcommand{\longformat}{}
+
% Read the current Git commit information
\include{git-commit}
\include{tex/preamble}
@@ -34,8 +37,8 @@
(\textcolor{blue}{\href{https://arxiv.org/abs/#1}{arXiv:#1}})}}
%% Set the title
-\title{{\huge\textbf{BIG} Data, \textbf{BIG} responsibility} \\
- {\LARGE Towards Long-term and Archivable Reproducibility} \\
+\title{Introducing Maneage:\\
+ Customizable framework for managing data lineage\\
\vspace{2mm}{\small [RDA Europe Adoption grant recipient. Submitted to \href{https://www.computer.org/csdl/magazine/cs}{IEEE CiSE} (\textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}}), Comments welcome]}
}
@@ -89,6 +92,7 @@
\begin{frame}
\titlepage
\end{frame}
+ \usebackgroundtemplate{ } %% undeclare it
\begin{frame}{Challenges of the RDA-WDS Publishing Data Workflows WG {\small (DOI:\href{https://doi.org/10.1007/s00799-016-0178-2}{10.1007/s00799-016-0178-2})}}
@@ -105,7 +109,8 @@
\includegraphics[width=4cm]{img/wds.jpg}
\end{center}
- \pause
+ \ifdefined\longformat\pause\fi
+
``\emph{We would like to see a workflow that results in all
\textcolor{blue!30!green}{\bf scholarly objects being connected},
linked, citable, and persistent to allow researchers to navigate
@@ -118,13 +123,14 @@
discoveries.}''
\end{frame}
-\usebackgroundtemplate{ } %% undeclare it
-
-
\newcommand{\allopacity}{1}
+ \ifdefined\longformat
\begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \fi
\newcommand{\paperinit}{}
+ \ifdefined\longformat
\begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
+ \fi
\newcommand{\sver}{}
\newcommand{\srep}{}
\newcommand{\dver}{}
@@ -189,7 +195,7 @@
result.
\end{tcolorbox}
- \pause
+ \ifdefined\longformat\pause\fi
\begin{itemize}
\item \textbf{Complete/self-contained:}
\begin{itemize}
@@ -199,25 +205,25 @@
\item Should be usable \alert{without internet} connection.
\end{itemize}
- \pause
+ \ifdefined\longformat\pause\fi
\item \textbf{Modularity:} Parts of the project should be \alert{re-usable} in other projects.
- \pause
+ \ifdefined\longformat\pause\fi
\item \textbf{Plain text:} Project's source should be in \alert{plain-text} \textcolor{gray}{(binary formats need special software)}
\begin{itemize}
\item This includes high-level analysis.
\item It is easily publishable (very low volume of $\times100$KB), archivable, and parse-able.
\item \alert{Version control} (e.g., with Git) can track project's history.
\end{itemize}
- \pause
+ \ifdefined\longformat\pause\fi
\item \textbf{Minimal complexity:} Occum’s rasor: “Never posit pluralities without necessity”.
\begin{itemize}
\item Avoiding the \alert{fashionable} tool of the day: tomorrow another tool will take its place!
\item Easier \alert{learning curve}, also doesn't create a \alert{generational gap}.
\item Is \alert{compatible} and \alert{extensible}.
\end{itemize}
- \pause
+ \ifdefined\longformat\pause\fi
\item \textbf{Verifable inputs and outputs:} Inputs and Outputs must be \alert{automatically verified}.
- \pause
+ \ifdefined\longformat\pause\fi
\item \textbf{Free and open source software:} \alert{Free software} is essential: non-free software is not configurable, not distributable, and dependent on non-free provider (which may discontinue it in N years).
\end{itemize}
\end{frame}
@@ -231,7 +237,7 @@
-
+ \ifdefined\longformat
\begin{frame}{Predefined/exact software tools}
\small
\begin{columns}
@@ -266,7 +272,7 @@
\includegraphics[width=\linewidth]{img/version.png}
\end{columns}
\end{frame}
-
+ \fi
@@ -310,30 +316,6 @@
- \begin{frame}{Controlled environment and build instructions}
- \small
- \begin{columns}
- \column{5.5cm}
- \includegraphics[width=0.9\linewidth]{img/env.png}
- \column{5.5cm}
- \includegraphics[width=0.9\linewidth]{img/build.png}
- \end{columns}
- \end{frame}
-
- \begin{frame}{Controlled environment and build instructions}
- \small
- \begin{columns}
- \column{5.5cm}
- \includegraphics[width=0.9\linewidth]{img/env-highlighted.png}
- \column{5.5cm}
- \includegraphics[width=0.9\linewidth]{img/build-highlighted.png}
- \end{columns}
- \end{frame}
-
-
-
-
-
\begin{frame}{Example: Matplotlib (a Python visualization library) build dependencies}
\Wider[5em]{
%\vspace{5mm}
@@ -343,8 +325,7 @@
\vspace{3mm}\tiny From ``Attributing and Referencing (Research)
Software: Best Practices and Outlook from Inria'' (Alliez et
- al. 2019,
- \textcolor{blue}{\href{https://hal.archives-ouvertes.fr/hal-02135891}{hal-02135891}})
+ al. 2020, CiSE, DOI:\textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2019.2949413}{10.1109/MCSE.2019.2949413}}).
}
\end{frame}
@@ -379,10 +360,14 @@
\end{frame}
+
+
+ \ifdefined\longformat
\begin{frame}{Software citation automatically generated in paper (including Astropy)}
\centering
\includegraphics[width=0.8\linewidth]{img/software-cite.jpg}
\end{frame}
+ \fi
\begin{frame}{Software citation automatically generated in paper (including Astropy)}
\centering
\includegraphics[width=0.8\linewidth]{img/software-cite-highlighted.jpg}
@@ -393,6 +378,7 @@
\begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
\let\focusonhardware\undefined
+ \ifdefined\longformat
\begin{frame}{Input data source and integrity is documented and checked}
\small
\begin{columns}
@@ -418,6 +404,7 @@
\includegraphics[width=\linewidth]{img/inputs.png}
\end{columns}
\end{frame}
+ \fi
\begin{frame}{Input data source and integrity is documented and checked}
\small
@@ -463,7 +450,7 @@
-
+ \ifdefined\longformat
\begin{frame}{Reproducible science: Maneage is managed through a Makefile}
\small
\begin{columns}
@@ -530,6 +517,7 @@
\includegraphics[width=\linewidth]{img/reproducible-makefile-highlighted-1.png}
\end{columns}
\end{frame}
+ \fi
\begin{frame}{Reproducible science: Maneage is managed through a Makefile}
\small
\begin{columns}
@@ -592,7 +580,7 @@
\begin{frame}{General outline of a project (after data collection)} \include{tex/project-graph} \end{frame}
\let\focusonpaper\undefined
-
+ \ifdefined\longformat
\begin{frame}{Values in final report/paper}
All analysis \alert{results} (numbers, plots, tables) written in
paper's PDF as \alert{\LaTeX{} macros}. They are thus
@@ -603,6 +591,7 @@
\vspace{0.4cm}
\includegraphics[width=\linewidth]{img/reproducible-latex.png}
\end{frame}
+ \fi
\begin{frame}{Values in final report/paper}
All analysis \alert{results} (numbers, plots, tables) written in
@@ -618,13 +607,14 @@
-
+ \ifdefined\longformat
\begin{frame}{Analysis step results/values concatenated into a single file.}
All \LaTeX{} macros come from a \alert{single file}.
\begin{center}
\includegraphics[width=0.6\linewidth]{img/reproducible-macros.png}
\end{center}
\end{frame}
+ \fi
\begin{frame}{Analysis step results/values concatenated into a single file.}
All \LaTeX{} macros come from a \alert{single file}.
\begin{center}
@@ -637,7 +627,7 @@
-
+ \ifdefined\longformat
\begin{frame}{Analysis results stored as \LaTeX{} macros}
The analysis scripts write/update the \LaTeX{} macro values
automatically.
@@ -645,6 +635,7 @@
\includegraphics[width=0.6\linewidth]{img/reproducible-write-macro.png}
\end{center}
\end{frame}
+ \fi
\begin{frame}{Analysis results stored as \LaTeX{} macros}
The analysis scripts write/update the \LaTeX{} macro values
automatically.
@@ -654,12 +645,35 @@
\end{frame}
+
%% Make demo.
- \begin{frame}
- \LARGE
- \vspace{1cm}
- \hfill Let's see how the analysis is managed in a hypothetical project...
+ \begin{frame}{Let's look at the data lineage to replicate Figure 1C (green/tool) of Menke+2020 \\(DOI:\href{https://doi.org/10.1101/2020.01.15.908111}{10.1101/2020.01.15.908111}), as done in \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}} for a demo.}
+ \begin{columns}
+ \column{0.55\linewidth}
+ \textcolor{blue}{ORIGINAL PLOT}
+
+ The Green plot shows the fraction of papers mentioning software tools from 1997 to 2019.
+ \column{0.45\linewidth}
+ \includegraphics[width=\linewidth]{img/tools-per-year-orig.jpg}
+ \end{columns}
+
+ \rule{\textwidth}{1pt}
+
+ \begin{columns}
+ \column{0.4\linewidth}
+ \textcolor{green!70!black}{OUR enhanced REPLICATION}
+
+ The green line is same as above but over their full historical range.
+
+ Red histogram is the number of papers studied in each year
+ \column{0.6\linewidth}
+ \vspace{1cm}
+ \includegraphics[width=\linewidth]{img/tools-per-year.pdf}
+ \end{columns}
\end{frame}
+
+
+ \ifdefined\longformat
\makedemoslide{img/data-lineage-1.pdf}
{Makefiles (\texttt{\*.mk}) keep contextually separate parts of the project, all imported into \texttt{top-make.mk}}
\makedemoslide{img/data-lineage-2.pdf}
@@ -673,37 +687,31 @@
\makedemoslide{img/data-lineage-6.pdf}
{Basic project info comes from \texttt{initialize.tex}.}
\makedemoslide{img/data-lineage-7.pdf}
- {Reported values about the downloaded inputs come from \texttt{download.tex}.}
+ {The paper includes some information about the plot.}
\makedemoslide{img/data-lineage-8.pdf}
- {... for example the number of rows in the second input (a catalog) of the project.}
+ {The final plotted data are calculated and stored in \texttt{tools-per-year.txt}.}
\makedemoslide{img/data-lineage-9.pdf}
- {The URL to download \texttt{input2.dat}, and a checksum to validate it, are stored in \texttt{INPUTS.conf}.}
+ {The plot's calculation is done on a formatted sub-set of the raw input data.}
\makedemoslide{img/data-lineage-10.pdf}
- {Reported values from first analysis steps stored in \texttt{analysis1.tex}.}
+ {The raw data that were downloaded are stored in XLSX format.}
\makedemoslide{img/data-lineage-11.pdf}
- {... for example the average of the numbers in \texttt{out-1b.dat}.}
+ {The download URL \emph{and} a \alert{checksum to validate} the raw inputs, are stored in \texttt{INPUTS.conf}.}
\makedemoslide{img/data-lineage-12.pdf}
- {But \texttt{out-1b.dat} itself depends on other files and a paramter (for example a multiple of sigma).}
+ {We also need to report the URL in the paper...}
\makedemoslide{img/data-lineage-13.pdf}
- {\texttt{out-1a.dat} is built from a downloaded dataset.}
+ {Some general info about the full dataset may also be reported.}
+ \fi
+
+ \ifdefined\longformat
+ \makedemoslide{img/data-lineage-14.pdf}
+ {We report the number of papers studied in a special year, desired year is stored in \texttt{.conf} file.}
+ \else
\makedemoslide{img/data-lineage-14.pdf}
- {Download URL and checksum of \texttt{input1.dat} also stored in \texttt{INPUTS.conf}.}
+ {All analysis steps cascade down to paper.pdf (URL and checksum of input in \texttt{INPUTS.conf}).}
+ \fi
+
\makedemoslide{img/data-lineage-15.pdf}
- {Reported values from second analysis steps stored in \texttt{analysis2.tex}.}
- \makedemoslide{img/data-lineage-16.pdf}
- {... for example the number of selected rows in \texttt{out-2b.dat}.}
- \makedemoslide{img/data-lineage-17.pdf}
- {\texttt{out-2b.dat} is derived from \texttt{out-1b.dat} (for example, rejected some of \texttt{out-1b.dat}'s rows).}
- \makedemoslide{img/data-lineage-18.pdf}
- {Reported values from third analysis steps stored in \texttt{analysis3.tex}.}
- \makedemoslide{img/data-lineage-19.pdf}
- {... for example measurements from both \texttt{out-3a.dat} and \texttt{out-3b.dat}.}
- \makedemoslide{img/data-lineage-20.pdf}
- {\texttt{out-3b.dat} is generated from an analysis on \texttt{out-2a.dat}.}
- \makedemoslide{img/data-lineage-21.pdf}
- {But \texttt{out-2a.dat} itself is generated from \texttt{input1.dat} and an analysis which has two settings.}
- \makedemoslide{img/data-lineage-22.pdf}
- {\texttt{out-3a.dat} also depends on \texttt{out-1a.dat} and an analysis with needs one parameter.}
+ {It is very easy to expand the project and add new analysis steps (this solution is scalable)}
@@ -742,7 +750,7 @@
-
+ \ifdefined\longformat
\begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
\newcommand{\projinit}{}
\begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
@@ -757,8 +765,18 @@
\newcommand{\githappy}{}
\begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
\newcommand{\gitverified}{}
+ \else
+ \newcommand{\projinit}{}
+ \newcommand{\projwork}{}
+ \newcommand{\tempevolve}{}
+ \newcommand{\mergewithtemp}{}
+ \newcommand{\tofuture}{}
+ \newcommand{\githappy}{}
+ \newcommand{\gitverified}{}
+ \fi
\begin{frame}{New projects branch from Maneage} \include{tex/git-branch} \end{frame}
+ \ifdefined\longformat
\begin{frame}{Two recent examples (publishing Git checksum in abstract)}
\begin{columns}
\column{0.5\linewidth}
@@ -769,6 +787,7 @@
\includegraphics[width=0.8\linewidth]{img/firstpage-mnras491.png}
\end{columns}
\end{frame}
+ \fi
\begin{frame}{Two recent examples (publishing Git checksum in abstract)}
\begin{columns}
@@ -804,12 +823,10 @@
\begin{itemize}
\item \alert{arXiv}: uploaded with the \LaTeX{} source to always
stay with the paper \\(for example
- \textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}}). The
- file containing all macros must also be uploaded so arXiv's
- server can easily build the \LaTeX{} source.
+ \textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}} or \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}}).
\item \alert{Zenodo}: Along with all the input datasets (many
Gigabytes) and software \\(for example
- \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.3408481}{zenodo.3408481}}) and given a unique DOI.
+ \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.3872248}{zenodo.3872248}}) and given a unique DOI.
\end{itemize}
\end{frame}
@@ -817,39 +834,36 @@
- \begin{frame}{Project source and its execution}
- \begin{tcolorbox}
- Programs \textcolor{gray}{[here: Scientific projects]} must be
- written for \alert{people to read}...
-
- \hfill ...and only \emph{incidentally} for machines to
- \emph{execute}.
-
- \vspace{2mm}
- \hfill \footnotesize Harold Abelson, Structure and Interpretation of Computer Programs
- \end{tcolorbox}
- \end{frame}
-
-
-
-
-
- \begin{frame}[t]{General outline of using this system (for example \href{https://arxiv.org/abs/1909.11230}{arXiv:1909.11230})}
+ \begin{frame}[t]{General outline of using Maneage (for example \href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018})}
\vspace{1cm}
- \texttt{\$ git clone http://gitlab.com/makhlaghi/iau-symposium-355{ }{ }{ }{ }\textcolor{gray}{\# Import the project.}}\\
+ \texttt{\$ git clone https://gitlab.com/makhlaghi/maneage-paper{ }{ }{ }{ }\textcolor{gray}{\# Import the project.}}\\
- \pause
+ \ifdefined\longformat\pause\fi
\vspace{1.5cm}
\texttt{\$ ./project configure { }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# You will specify the build directory on your system,}}\\
\texttt{{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# and it will build all software (about 1.5 hours).}}
- \pause
+ \ifdefined\longformat\pause\fi
\vspace{1.5cm}
\texttt{\$ ./project make { }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }{ }\textcolor{gray}{\# Does all the analysis and makes final PDF.}}\\
\end{frame}
+ \ifdefined\longformat
+ \begin{frame}{Project source and its execution}
+ \begin{tcolorbox}
+ Programs \textcolor{gray}{[here: Scientific projects]} must be
+ written for \alert{people to read}...
+
+ \hfill ...and only \emph{incidentally} for machines to
+ \emph{execute}.
+
+ \vspace{2mm}
+ \hfill \footnotesize Harold Abelson, Structure and Interpretation of Computer Programs
+ \end{tcolorbox}
+ \end{frame}
+ \fi
@@ -887,6 +901,52 @@
\end{itemize}
\end{frame}
+
+
+
+
+ \begin{frame}{Summary:}
+
+ Maneage and its principles are described in \textcolor{blue}{\href{https://arxiv.org/abs/2006.03018}{arXiv:2006.03018}}.
+ It is a customizable template that will do the following steps/instructions (all in simple plain text files).
+ \begin{itemize}
+ \item \alert{Automatically downloads} the necessary
+ \emph{software} and \emph{data}.
+ \item \alert{Builds} the software in a \alert{closed
+ environment}.
+ \item Runs the software on data to \alert{generate} the final
+ \alert{research results}.
+ \item Modification of part of the analysis will only
+ result in re-doing that part, not the whole project.
+ \item Using LaTeX macros, paper's figures, tables and numbers
+ will be \alert{Automatically updated} after a change in
+ analysis. Allowing the scientist to focus on the scientific
+ interpretation.
+ \item The whole project is under \alert{version control} (Git)
+ to allow easy reversion to a previous state. This
+ \alert{encourages tests/experimentation} in the analysis.
+ \item The \alert{Git commit hash} of the project source, is
+ \alert{printed} in the published paper and \alert{saved on
+ output} data products. Ensuring the
+ integrity/reproducibility of the result.
+ \item \colorbox{green!30!white}{These slides are available at
+ \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro-short.pdf}}.}
+ \item \colorbox{green!15!white}{Longer slides are available at
+ \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro.pdf}}.}
+ \end{itemize}
+
+ \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
+ top=1pt, bottom=1pt]
+ For a technical description of Maneage's implementation, as well
+ as a checklist to customize it, and tips on good practices,
+ please see this page:
+
+ \textcolor{blue}{\footnotesize\url{https://gitlab.com/maneage/project/-/blob/maneage/README-hacking.md}}
+ \end{tcolorbox}
+ \end{frame}
+
+
+\ifdefined\longformat
\begin{frame}{Existing technologies (Independent environment)}
\begin{itemize}
\setlength\itemsep{7mm}
@@ -910,13 +970,11 @@
\vspace{3mm}
In summary, they only \alert{store a built} environment (they are outputs, not good for archiving).
-
\end{frame}
-
\begin{frame}{Existing technologies (Package managers)}
\begin{itemize}
@@ -957,45 +1015,5 @@ In summary, they only \alert{store a built} environment (they are outputs, not g
\end{itemize}
\alert{Summary}: except for IPOL, most solutions surveyed have far too many dependencies to be usable \alert{beyond the immediate future}.
\end{frame}
-
-
-
-
- \begin{frame}{Summary:}
-
- Maneage is introduced as a customizable template that will do the
- following steps/instructions (all in simple plain text files).
- \begin{itemize}
- \item \alert{Automatically downloads} the necessary
- \emph{software} and \emph{data}.
- \item \alert{Builds} the software in a \alert{closed
- environment}.
- \item Runs the software on data to \alert{generate} the final
- \alert{research results}.
- \item A modification in one part of the analysis will only
- result in re-doing that part, not the whole project.
- \item Using LaTeX macros, paper's figures, tables and numbers
- will be \alert{Automatically updated} after a change in
- analysis. Allowing the scientist to focus on the scientific
- interpretation.
- \item The whole project is under \alert{version control} (Git)
- to allow easy reversion to a previous state. This
- \alert{encourages tests/experimentation} in the analysis.
- \item The \alert{Git commit hash} of the project source, is
- \alert{printed} in the published paper and \alert{saved on
- output} data products. Ensuring the
- integrity/reproducibility of the result.
- \item \colorbox{green!30!white}{These slides are available at
- \textcolor{blue}{\url{https://maneage.org/pdf/slides-intro.pdf}}.}
- \end{itemize}
-
- \begin{tcolorbox}[width=\linewidth, boxsep=1pt, left=1pt, right=1pt,
- top=1pt, bottom=1pt]
- For a technical description of Maneage's implementation, as well
- as a checklist to customize it, and tips on good practices,
- please see this page:
-
- \textcolor{blue}{\footnotesize\url{https://gitlab.com/maneage/project/-/blob/maneage/README-hacking.md}}
- \end{tcolorbox}
- \end{frame}
+\fi
\end{document}
diff --git a/slides-intro.tex b/slides-intro.tex
index f4fcf97..3ffedf9 100644
--- a/slides-intro.tex
+++ b/slides-intro.tex
@@ -383,8 +383,7 @@ for computational reproducibility]
\vspace{3mm}\tiny From ``Attributing and Referencing (Research)
Software: Best Practices and Outlook from Inria'' (Alliez et
- al. 2019,
- \textcolor{blue}{\href{https://hal.archives-ouvertes.fr/hal-02135891}{hal-02135891}})
+ al. 2020, CiSE, DOI:\textcolor{blue}{\href{https://doi.org/10.1109/MCSE.2019.2949413}{10.1109/MCSE.2019.2949413}}).
}
\end{frame}
\begin{frame}{Impact of ``Dependency hell'' on native building in various hardware (CPU architectures)}
@@ -1048,10 +1047,29 @@ for computational reproducibility]
%% Make demo.
- \begin{frame}
- \LARGE
- \vspace{1cm}
- \hfill Let's see how the analysis is managed in a hypothetical project...
+ \begin{frame}{Let's look at the data lineage to replicate Figure 1C (green/tool) of Menke+2020 \\(DOI:\href{https://doi.org/10.1101/2020.01.15.908111}{10.1101/2020.01.15.908111})}
+ \begin{columns}
+ \column{0.55\linewidth}
+ \textcolor{blue}{ORIGINAL PLOT}
+
+ The Green plot shows the fraction of papers mentioning software tools from 1997 to 2019.
+ \column{0.45\linewidth}
+ \includegraphics[width=\linewidth]{img/tools-per-year-orig.jpg}
+ \end{columns}
+
+ \rule{\textwidth}{1pt}
+
+ \begin{columns}
+ \column{0.4\linewidth}
+ \textcolor{green!70!black}{OUR enhanced REPLICATION}
+
+ The green line is same as above but over their full historical range.
+
+ Red histogram is the number of papers studied in each year
+ \column{0.6\linewidth}
+ \vspace{1cm}
+ \includegraphics[width=\linewidth]{img/tools-per-year.pdf}
+ \end{columns}
\end{frame}
\makedemoslide{img/data-lineage-1.pdf}
{Makefiles (\texttt{\*.mk}) keep contextually separate parts of the project, all imported into \texttt{top-make.mk}}
@@ -1066,37 +1084,24 @@ for computational reproducibility]
\makedemoslide{img/data-lineage-6.pdf}
{Basic project info comes from \texttt{initialize.tex}.}
\makedemoslide{img/data-lineage-7.pdf}
- {Reported values about the downloaded inputs come from \texttt{download.tex}.}
+ {The paper includes some information about the plot.}
\makedemoslide{img/data-lineage-8.pdf}
- {... for example the number of rows in the second input (a catalog) of the project.}
+ {The final plotted data are calculated and stored in \texttt{tools-per-year.txt}.}
\makedemoslide{img/data-lineage-9.pdf}
- {The URL to download \texttt{input2.dat}, and a checksum to validate it, are stored in \texttt{INPUTS.conf}.}
+ {The plot's calculation is done on a formatted sub-set of the raw input data.}
\makedemoslide{img/data-lineage-10.pdf}
- {Reported values from first analysis steps stored in \texttt{analysis1.tex}.}
+ {The raw data that were downloaded are stored in XLSX format.}
\makedemoslide{img/data-lineage-11.pdf}
- {... for example the average of the numbers in \texttt{out-1b.dat}.}
+ {The download URL \emph{and} a \alert{checksum to validate} the raw inputs, are stored in \texttt{INPUTS.conf}.}
\makedemoslide{img/data-lineage-12.pdf}
- {But \texttt{out-1b.dat} itself depends on other files and a paramter (for example a multiple of sigma).}
+ {We also need to report the URL in the paper...}
\makedemoslide{img/data-lineage-13.pdf}
- {\texttt{out-1a.dat} is built from a downloaded dataset.}
+ {Some general info about the full dataset may also be reported.}
\makedemoslide{img/data-lineage-14.pdf}
- {Download URL and checksum of \texttt{input1.dat} also stored in \texttt{INPUTS.conf}.}
+ {We report the number of papers studied in a special year, desired year is stored in \texttt{.conf} file.}
+
\makedemoslide{img/data-lineage-15.pdf}
- {Reported values from second analysis steps stored in \texttt{analysis2.tex}.}
- \makedemoslide{img/data-lineage-16.pdf}
- {... for example the number of selected rows in \texttt{out-2b.dat}.}
- \makedemoslide{img/data-lineage-17.pdf}
- {\texttt{out-2b.dat} is derived from \texttt{out-1b.dat} (for example, rejected some of \texttt{out-1b.dat}'s rows).}
- \makedemoslide{img/data-lineage-18.pdf}
- {Reported values from third analysis steps stored in \texttt{analysis3.tex}.}
- \makedemoslide{img/data-lineage-19.pdf}
- {... for example measurements from both \texttt{out-3a.dat} and \texttt{out-3b.dat}.}
- \makedemoslide{img/data-lineage-20.pdf}
- {\texttt{out-3b.dat} is generated from an analysis on \texttt{out-2a.dat}.}
- \makedemoslide{img/data-lineage-21.pdf}
- {But \texttt{out-2a.dat} itself is generated from \texttt{input1.dat} and an analysis which has two settings.}
- \makedemoslide{img/data-lineage-22.pdf}
- {\texttt{out-3a.dat} also depends on \texttt{out-1a.dat} and an analysis with needs one parameter.}
+ {It is very easy to expand the project and add new analysis steps (this solution is scalable)}
diff --git a/tex/git-branch.tex b/tex/git-branch.tex
index fa35e0d..ad5b2c9 100644
--- a/tex/git-branch.tex
+++ b/tex/git-branch.tex
@@ -153,7 +153,7 @@
\end{itemize}
\ifdefined\gitverified
- \vspace{3mm}
+ \vspace{-1mm}
\hfill\tiny ``Verified'' image from \href{https://www.vectorstock.com/royalty-free-vector/red-vintage-verified-stamp-retro-style-on-white-vector-22770076}{vectorstock.com}
\fi
\end{columns}