aboutsummaryrefslogtreecommitdiff
path: root/reproducible-paper.tex
blob: 2abf09e30727c3337b71803ea600cedd00391abb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
\documentclass[9pt]{beamer}


%% Beamer settings.
\setbeamertemplate{footline}[frame number]


%% Packages to import.
\usepackage{tcolorbox}          %For a color-box.
\usepackage{textcomp}           %For a copyright sign.


%% To simplify arXiv links
\newcommand{\arxivlink}[1]{{\footnotesize
  (\textcolor{blue}{\href{https://arxiv.org/abs/#1}{arXiv:#1}})}}



%% Set the title
\title{Reproducible scientific paper/project}


%% Set the author
\author{Mohammad Akhlaghi\\\vspace{2mm}\footnotesize Instituto de
  Astrof\'isica de Canarias ({\scriptsize IAC}),\\Tenerife, Spain
}


%% Set the date and insitutional logos.
\date{\includegraphics[width=2cm]{img/iac.png}}










\begin{document}

  \begin{frame}
    \titlepage
  \end{frame}


  \begin{frame}{Necessity of (exactly) reproducible research}
    \begin{itemize}
      \setlength\itemsep{0.3cm}
    \item To be considered \alert{scientific}, any result has to be
      reproducible.
    \item The tsunami of data, fast internet, and high processing
      power have made it very easy to \alert{promptly arrive at a
        result}.
    \item But these factors have also greatly increased the
      \alert{complexity} of an analysis. Making it impossible to
      exactly describe all steps in a published paper.
    \item Most scientific papers thus ignore the ``details'' (as they
      interpret it).
    \item But due to the complexity, even a small deviation from the
      exact result, can be due to many different parts of the
      analysis. Hence, its \alert{critical to exactly reproduce} a
      result.
     \item The software(s) used, configuration file(s), the order of
       steps taken, along with the input data are necessary for
       reproducibility.
     \item \alert{A solution} is proposed here, which if adopted from
       the start, can greatly \alert{simplify a scientific research
         project} and \alert{allow full/exact reproducibility} once it
       is published.
    \end{itemize}
  \end{frame}



  \begin{frame}{Values in final report/paper}
    All necessary analysis/processing \alert{input} and \alert{output}
    values are written into the final report as \LaTeX{} macros. Shown
    here is a portion of the \textsf{NoiseChisel} paper and its source
    (\textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}}).

    \vspace{1.2cm}
    \includegraphics[width=\linewidth]{img/reproducible-latex.png}
  \end{frame}

  \begin{frame}{Values in final report/paper}
    All necessary analysis/processing \alert{input} and \alert{output}
    values are written into the final report as \LaTeX{} macros. Shown
    here is a portion of the \textsf{NoiseChisel} paper and its source
    (\textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}}).

    \vspace{1.2cm}
    \includegraphics[width=\linewidth]{img/reproducible-latex-highlighted.png}
  \end{frame}


  \begin{frame}{Values come from a single file}
    All the \LaTeX{} macros (processing inputs and outputs) come from
    a \alert{single file}. This file is the \alert{final product} of
    the analysis steps.

    \begin{center}
      \includegraphics[width=0.8\linewidth]{img/reproducible-macros.png}
    \end{center}
  \end{frame}



  \begin{frame}{Values come from a single file}
    All the \LaTeX{} macros (processing inputs and outputs) come from
    a \alert{single file}. This file is the \alert{final product} of
    the analysis steps.

    \begin{center}
      \includegraphics[width=0.8\linewidth]{img/reproducible-macros-highlighted.png}
    \end{center}
  \end{frame}


  \begin{frame}{Values written during analysis}
    Various steps of the analysis write the macro values as soon as
    they are calculated internally.

    \begin{center}
      \includegraphics[width=0.8\linewidth]{img/reproducible-write-macro.png}
    \end{center}
  \end{frame}


  \begin{frame}{Values written during analysis}
    Various steps of the analysis write the macro values as soon as
    they are calculated internally.

    \begin{center}
      \includegraphics[width=0.8\linewidth]{img/reproducible-write-macro-highlight.png}
    \end{center}
  \end{frame}


  \begin{frame}{Reproducible science: Template is managed through a Makefile}
    \small
    \begin{columns}
      \column{5.5cm}

      All steps (downloading and analysis) is managed by Makefiles
      (example from
      \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}):

      \begin{itemize}
        \setlength\itemsep{0.2cm}
      \item Unlike a script which always starts from the top, a
        Makefile \alert{starts from the end} and steps that don't
        change will be left untouched (not remade).
      \item A single \emph{rule} can \alert{manage any number of
        files}. See the examples here where \textsf{NoiseChisel} and
        \textsf{MakeCatalog} are run separately on \alert{$\sim20$
          files} (different filters/fields) with a single rule.
      \item Make can identify independent steps internally and do them
        in \alert{parallel}.
      \item Make was \alert{designed for complex problems} with
        thousands of files (all major Unix-like components), so it is
        highly evolved and efficient.
      \item Make is a very \alert{simple} and \alert{small} language,
        thus easy to learn with great and free documentation (for
        example
        \textcolor{blue}{\href{https://www.gnu.org/software/make/manual/}{GNU
            Make's manual}}, usable to learn all implementations).
      \end{itemize}

      \column{5.5cm}
      \includegraphics[width=\linewidth]{img/reproducible-makefile.png}
    \end{columns}
  \end{frame}


  \begin{frame}{Reproducing the result and report/paper}
    The two \alert{simple} and \alert{familiar} commands below are
    enough to exactly reproduce the results at any time.

    \begin{itemize}
    \item[] \texttt{\$ ./configure}
    \item[] \texttt{\$ make}
    \end{itemize}

    With \texttt{./configure}, you specify the local directories to
    use. All necessary \alert{software} are then \alert{downloaded}
    and installed there (independent of your OS or other projects).

    \vspace{0.3cm} With \texttt{make}, input \alert{data} from online
    archives (databases) are \alert{downloaded}, if not locally
    available, the processing is done, and the \LaTeX{} paper is built
    as a PDF (e.g., see
    \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}
    or
    \textcolor{blue}{\small\href{https://gitlab.com/makhlaghi/reproducible-paper-output/raw/master/paper.pdf}{template's
        output}}).

    \vspace{0.3cm} Enabling version control (e.g., with \alert{Git})
    encourages testing different ideas while not harming the
    initial/base result (thus encouraging \alert{creativity} and
    brainstorming during the project).

    \vspace{0.3cm} After publication, \alert{readers} can
    \alert{change} the input configurations and the numbers and
    figures of the reproduced paper will respectively change. This
    encourages creativity and brainstorming after the project as well
    as sharing of (the hardly gained) experiences with the whole
    community.
  \end{frame}



  \begin{frame}{Publication of the project}

    A reproducible project using this template will have the following
    (\alert{plain text}) components:
    \begin{itemize}
    \item Makefiles.
    \item \LaTeX{} source files.
    \item Configuration files for software used in analysis.
    \item Scripts/programming files (e.g., Python, Shell, AWK, C).
    \end{itemize}
    The \alert{volume} of the project's source will thus be
    \alert{negligible} compared to a single figure in a paper
    (usually $\sim100$ kilo-bytes).

    \vspace{1cm} The project's pipeline (customized template) can be
    \alert{published} in
    \begin{itemize}
    \item \alert{arXiv}: uploaded with the \TeX{} source to always
      stay with the paper \\(for example
      \textcolor{blue}{\small\href{https://arxiv.org/abs/1505.01664}{arXiv:1505.01664}}). The
      file containing all macros must also be uploaded so arXiv's
      server can easily build the \LaTeX{} source.
    \item \alert{Zenodo}: Along with all the input datasets (many
      Gigabytes) and software \\(for example
      \textcolor{blue}{\small\href{https://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}}) and given a unique DOI.
    \end{itemize}

  \end{frame}



  \begin{frame}
    The template is ready to use in the link below:

    \textcolor{blue}{\footnotesize\url{https://gitlab.com/makhlaghi/reproducible-paper}}

    \vspace{1.5cm} For a technical description of the template's
    implementation, as well as a checklist to customize it, and tips
    on good practices, please see this page:

    \textcolor{blue}{\footnotesize\url{https://gitlab.com/makhlaghi/reproducible-paper/blob/pipeline/README-hacking.md}}

    \vspace{1.5cm} For more on the necessity of reproducible research,
    please see:

    \textcolor{blue}{\footnotesize\url{http://akhlaghi.org/reproducible-science.html}}
  \end{frame}
\end{document}