aboutsummaryrefslogtreecommitdiff
path: root/reproducible-paper.tex
diff options
context:
space:
mode:
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2019-12-15 03:21:15 +0000
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2019-12-15 03:28:03 +0000
commitace21fbfbcb4a3f47b2cf1f1ce4fce50e417b5d4 (patch)
tree75397701024b0a03daf95201c78abf733f18fa19 /reproducible-paper.tex
parente77717d10fb67d16d3c1b7e069a91ae651cfeb0f (diff)
Make-demo slides now start from the end and come to the start
The set of slides that show how the built files relate to each other through Makefiles was edited to first show the final target, and track it all the way to the raw configuration files. This is in-line with how I describe Make (that Make starts from the end). This great suggestion was made by Idafen Santana PĂ©rez.
Diffstat (limited to 'reproducible-paper.tex')
-rw-r--r--reproducible-paper.tex146
1 files changed, 79 insertions, 67 deletions
diff --git a/reproducible-paper.tex b/reproducible-paper.tex
index 6c8cd71..7cee761 100644
--- a/reproducible-paper.tex
+++ b/reproducible-paper.tex
@@ -846,55 +846,7 @@
- %% Make demo.
- \makedemoslide{img/make-demo-1.pdf}
- {Makefiles (\texttt{\*.mk}) keep contextually separate parts of the project, all imported into \texttt{top-make.mk}}
- \makedemoslide{img/make-demo-2.pdf}
- {First built/output file (downloaded dataset), managed in \texttt{download.mk}.}
- \makedemoslide{img/make-demo-3.pdf}
- {Downloaded file is used to build analysis-1's first output.}
- \makedemoslide{img/make-demo-4.pdf}
- {Third built file is for analysis-3, also depends on
- a value in a configuration file.}
- \makedemoslide{img/make-demo-5.pdf}
- {First input is also used to build another file (using
- two configuration parameters).}
- \makedemoslide{img/make-demo-6.pdf}
- {\texttt{out-3b.dat} is automatically built after (depends on) \texttt{out-2a.dat}.}
- \makedemoslide{img/make-demo-7.pdf}
- {We now need a second input file, which is downloaded.}
- \makedemoslide{img/make-demo-8.pdf}
- {\texttt{out-1b.dat} depends on \texttt{out-1a.dat},
- \texttt{input2.dat} and \texttt{param-1.conf}.}
- \makedemoslide{img/make-demo-9.pdf}
- {Final hypothetical output file (of the analysis phase) is
- also built.}
- \begin{frame}{Benefits of using Make for storing data lineage}
- \Large
- \begin{itemize}
- \setlength\itemsep{5mm}
- \item Every output file's position in the analysis is formally defined.\\
- {\normalsize (edges between the nodes of the graph in the previous slide)}
- \vspace{3mm}
- \begin{itemize}
- \Large
- \setlength\itemsep{3mm}
- \item Make can \alert{parallelize} the analysis: \\Make knows
- which steps are indepenent and will run them at the same time.\\
- \item Make can \alert{automatically detect a change} and will
- re-do \emph{only} the affected steps.\\ {\normalsize (for
- example to change the multiple of sigma in a configuration
- file to see its effect)}
- \item Easily \alert{backtrace} any step (without needing to remember!).\\
- {\normalsize (very useful to find problems/improvements)}
- \end{itemize}
- \vspace{3mm} All will speed up your work, and encourage
- experimentation for a robust result.
- \item Make is \alert{available} on any system: many people are \alert{already familiar} with it.
- \item And again: its \alert{all in plain text}!\\{\normalsize (doesn't take much space, easy to read, distribute, parse automatically, or archive)}
- \end{itemize}
- \end{frame}
@@ -979,33 +931,91 @@
\end{center}
\end{frame}
+
+ %% Make demo.
+ \begin{frame}
+ \LARGE
+ \vspace{1cm}
+ \hfill Let's see how all this is managed in a hypothetical project...
+ \end{frame}
+ \makedemoslide{img/make-demo-1.pdf}
+ {Makefiles (\texttt{\*.mk}) keep contextually separate parts of the project, all imported into \texttt{top-make.mk}}
+ \makedemoslide{img/make-demo-2.pdf}
+ {The ultimate purpose of the project is to produce a paper/report (in PDF).}
+ \makedemoslide{img/make-demo-3.pdf}
+ {The narrative description of the PDF and \LaTeX{} typography is kept in \texttt{paper.tex}.}
+ \makedemoslide{img/make-demo-4.pdf}
+ {The numbers of the PDF (blended into the narrative) come from \texttt{project.tex}.}
+ \makedemoslide{img/make-demo-5.pdf}
+ {Basic project info comes from \texttt{initialize.tex}.}
+ \makedemoslide{img/make-demo-6.pdf}
+ {Reported values about the downloaded inputs come from \texttt{download.tex}.}
+ \makedemoslide{img/make-demo-7.pdf}
+ {... for example the number of rows in the second input (a catalog) of the project.}
+ \makedemoslide{img/make-demo-8.pdf}
+ {The URL to download \texttt{input2.dat}, and a checksum to validate it, are stored in \texttt{INPUTS.conf}.}
\makedemoslide{img/make-demo-9.pdf}
- {Let's start from the end of the analysis phase (same graph as before).}
+ {Reported values from first analysis steps stored in \texttt{analysis1.tex}.}
\makedemoslide{img/make-demo-10.pdf}
- {Makefiles are also used to connect the analysis with with paper.}
+ {... for example the average of the numbers in \texttt{out-1b.dat}.}
\makedemoslide{img/make-demo-11.pdf}
- {Information from the raw inputs is stored in \texttt{download.tex}.}
+ {But \texttt{out-1b.dat} itself depends on other files and a paramter (for example a multiple of sigma).}
\makedemoslide{img/make-demo-12.pdf}
- {Values to report from the first analysis step are kept in
- \texttt{analysis1.tex}.}
+ {\texttt{out-1a.dat} is built from a downloaded dataset.}
\makedemoslide{img/make-demo-13.pdf}
- {Values to report from the second analysis step are kept in
- \texttt{analysis2.tex}.}
+ {Download URL and checksum of \texttt{input1.dat} also stored in \texttt{INPUTS.conf}.}
\makedemoslide{img/make-demo-14.pdf}
- {Values to report from the third analysis step are kept in
- \texttt{analysis3.tex}.}
+ {Reported values from second analysis steps stored in \texttt{analysis2.tex}.}
\makedemoslide{img/make-demo-15.pdf}
- {General project information are stored in \texttt{initialize.tex}.}
+ {... for example the number of selected rows in \texttt{out-2b.dat}.}
\makedemoslide{img/make-demo-16.pdf}
- {All the \LaTeX{} macros are loaded into the single
- \texttt{project.tex} (after the analysis is complete).}
- \makedemoslide{img/make-demo-17.pdf}
- {With the paper's \LaTeX{} source, \texttt{project.tex} is
- used to build the final PDF paper.}
+ {\texttt{out-2b.dat} is derived from \texttt{out-1b.dat} (for example, rejected some of \texttt{out-1b.dat}'s rows).}
\makedemoslide{img/make-demo-17.pdf}
- {Full data lineage (raw input $\leftrightarrow$ every
- number/paragraph and plot in paper's PDF) is recorded.}
+ {Reported values from third analysis steps stored in \texttt{analysis3.tex}.}
+ \makedemoslide{img/make-demo-18.pdf}
+ {... for example measurements from both \texttt{out-3a.dat} and \texttt{out-3b.dat}.}
+ \makedemoslide{img/make-demo-19.pdf}
+ {\texttt{out-3b.dat} is generated from an analysis on \texttt{out-2a.dat}.}
+ \makedemoslide{img/make-demo-20.pdf}
+ {But \texttt{out-2a.dat} itself is generated from \texttt{input1.dat} and an analysis which has two settings.}
+ \makedemoslide{img/make-demo-21.pdf}
+ {\texttt{out-3a.dat} also depends on \texttt{out-1a.dat} and an analysis with needs one parameter.}
+ \begin{frame}{\LARGE The whole project is a directed graph (codifying the data's lineage).}
+ \LARGE
+ \begin{itemize}
+ \setlength\itemsep{1cm}
+ \item Every \alert{file} (source or built) is a \alert{node} in the graph (connected to others).\\
+ {\large (The links/connections/dependencies between the nodes, defined by the Makefiles: \texttt{*.mk})}
+ \item There are two types of nodes/files:
+ \begin{itemize}
+ \LARGE
+ \setlength\itemsep{4mm}
+ \item \alert{Source} nodes (\texttt{*.conf} and \texttt{paper.tex}) only have an \alert{outward} link.
+ \item \alert{Built} files always have \alert{inward} \emph{and} {\normalsize (except \texttt{paper.pdf})} \alert{outward} link(s).
+ \end{itemize}
+ \item All built files ultimately originate from a \texttt{*.conf} file,\\
+ ... and ultimately conclude in \texttt{paper.pdf}.
+ \end{itemize}
+ \end{frame}
+ \begin{frame}{Benefits of using Make}
+ \Large
+ \begin{itemize}
+ \setlength\itemsep{4mm}
+ \item Make can \alert{parallelize} the analysis: \\Make knows
+ which steps are indepenent and will run them at the same time.\\
+ \item Make can \alert{automatically detect a change} and will
+ re-do \emph{only} the affected steps.\\ {\normalsize (for
+ example to change the multiple of sigma in a configuration
+ file to see its effect)}
+ \item Easily \alert{backtrace} any step (without needing to remember!).\\
+ {\normalsize (very useful to find problems/improvements)}
+ \item The above will speed up your work, and \alert{encourage experimentation} on methods.
+ \item Make is \alert{available} on any system: many people are \alert{already familiar} with it.
+ \item And again: its \alert{all in plain text}!\\{\normalsize (doesn't take much space, easy to read, distribute, parse automatically, or archive)}
+ \item Recall that the project's \alert{software installation} was also managed in Make.
+ \end{itemize}
+ \end{frame}
@@ -1026,10 +1036,12 @@
\newcommand{\allopacity}{1}
- \begin{frame}{Everything in plain text (machine and human readable)}
+ \begin{frame}{All questions have an answer now (in
+ \alert{plain text}: human \& computer readable/archivable).}
\include{tex/project-graph} \end{frame}
\newcommand{\gitlogo}{}
- \begin{frame}{Everything in plain text (machine and human readable)}
+ \begin{frame}{All questions have an answer now (in
+ \alert{plain text}: so we can use Git to keep its history).}
\include{tex/project-graph}
\end{frame}