From 2bfa3a043dcf394492a33bbcb16121dcb227b5ed Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Sat, 2 May 2020 04:38:55 +0100 Subject: First implementation of style in IEEEtran style The paper is no longer using LuaLaTeX, but raw LaTeX (that saves a DVI), it is so much faster! Initially I had used LuaLaTeX to use special fonts to resemble the CODATA Data Science Journal, but all that overhead is no longer necessary. Therefore I also removed the MANY extra LaTeX packages we were importing. The paper builds and is able to construct one of its images (the git-branching figure) with only 7 packages beyond the minimal TeX/LaTeX installation. Also in terms of processing it is so much faster. The text is just temporary now, and mainly just a place holder. With the next commit, I'll fill it with proper text. --- paper.tex | 639 ++------ reproduce/analysis/make/paper.mk | 21 +- reproduce/software/config/TARGETS.conf | 7 +- reproduce/software/config/texlive-packages.conf | 10 +- tex/img/icon-collaboration.eps | 159 ++ tex/img/icon-collaboration.svg | 107 ++ tex/img/icon-complete.eps | 162 +++ tex/img/icon-complete.svg | 2 + tex/img/icon-processing.eps | 212 +++ tex/img/icon-processing.svg | 1 + tex/src/figure-branching.tex | 13 +- tex/src/preamble-pgfplots.tex | 22 +- tex/src/preamble-project.tex | 27 + tex/src/references.bib | 1772 +++++++++++++++++++++++ tex/src/references.tex | 1772 ----------------------- 15 files changed, 2602 insertions(+), 2324 deletions(-) create mode 100644 tex/img/icon-collaboration.eps create mode 100644 tex/img/icon-collaboration.svg create mode 100644 tex/img/icon-complete.eps create mode 100644 tex/img/icon-complete.svg create mode 100644 tex/img/icon-processing.eps create mode 100644 tex/img/icon-processing.svg create mode 100644 tex/src/preamble-project.tex create mode 100644 tex/src/references.bib delete mode 100644 tex/src/references.tex diff --git a/paper.tex b/paper.tex index 1f5c037..8d7bde9 100644 --- a/paper.tex +++ b/paper.tex @@ -1,4 +1,11 @@ -\documentclass[10.5pt]{article} +%% Main LaTeX source of project's paper, license is printed in the end. +% +%% Copyright (C) 2020 Mohammad Akhlaghi +%% Copyright (C) 2020 Raúl Infante-Saiz +%% Copyright (C) 2020 Boudewijn F. Roukema +%% Copyright (C) 2020 David Valls-Gabaud +%% Copyright (C) 2020 Roberto Baena-Gallé +\documentclass[journal]{IEEEtran} %% This is a convenience variable if you are using PGFPlots to build plots %% within LaTeX. If you want to import PDF files for figures directly, you @@ -14,44 +21,44 @@ %% only for discussion. %\newcommand{\highlightchanges}{} -%% Import the necessary preambles. -\input{tex/src/preamble-style.tex} +%% Import necessary packages \input{tex/build/macros/project.tex} +\input{tex/src/preamble-project.tex} \input{tex/src/preamble-pgfplots.tex} -\input{tex/src/preamble-biblatex.tex} + +%% Title and author names. +\title{Towards Long-term and Archivable Reproducibility} +\author{ + Mohammad~Akhlaghi, + Ra\'ul Infante-Sainz, + Boudewijn F. Roukema, + David Valls-Gabaud, + Roberto Baena-Gall\'e + \thanks{Manuscript received MM DD, YYYY; revised MM DD, YYYY.} +} + +%% The paper headers +\markboth{Computing in Science and Engineering, Vol. X, No. X, MM YYYY}% +{Akhlaghi \MakeLowercase{\textit{et al.}}: Towards Long-term and Archivable Reproducibility} -\title{Towards long-term archivable reproducibility} -\author{\large\mpregular \authoraffil{Mohammad Akhlaghi}{1,2,3}, - \large\mpregular \authoraffil{Ra\'ul Infante-Sainz}{1,2}, - \large\mpregular \authoraffil{Boudewijn F. Roukema}{4,3}, - \large\mpregular \authoraffil{David Valls-Gabaud}{5}, - \large\mpregular \authoraffil{Roberto Baena-Gall\'e}{1,2}\\ - { - \footnotesize\mplight - \textsuperscript{1} Instituto de Astrof\'isica de Canarias, Calle V\'ia L\'actea s/n, 38205 La Laguna, Tenerife, Spain.\\ - \textsuperscript{2} Departamento de Astrof\'isica, Universidad de La Laguna, Avenida Astrof\'isico Francisco S\'anchez s/n, 38200 La Laguna, Tenerife, Spain.\\ - \textsuperscript{3} Univ Lyon, Ens de Lyon, Univ Lyon1, CNRS, Centre de Recherche Astrophysique de Lyon UMR5574, F-69007, Lyon, France.\\ - \textsuperscript{4} Institute of Astronomy, Faculty of Physics, Astronomy and Informatics, Nicolaus Copernicus University, Grudziadzka 5, 87-100 Toru\'n, Poland.\\ - \textsuperscript{5} LERMA, CNRS, Observaoire de Paris, 61 Avenue de l'Observatoire, 75014 Paris, France.\\ - Corresponding author: Mohammad Akhlaghi - (\href{mailto:mohammad@akhlaghi.org}{\textcolor{black}{mohammad@akhlaghi.org}}) - }} -\date{} -\begin{document}%\layout -\thispagestyle{firstpage} +%% Start the paper. +\begin{document} + +% make the title area \maketitle -%% Abstract % max 250 words for CiSE -{\noindent\mpregular +% As a general rule, do not put math, special symbols or citations +% in the abstract or keywords. +\begin{abstract} %% CONTEXT Many reproducible workflow solutions have been proposed over recent decades. Most use the high-level technologies that were popular when they were created, providing an immediate solution that is unlikely to be sustainable in the long term. @@ -62,93 +69,44 @@ %% METHOD The criteria have been tested in several research publications and can be summarized as: completeness (no dependency beyond a POSIX-compatible operating system, no administrator privileges, no network connection and storage primarily in plain-text); modular design; linking analysis with narrative; temporal provenance; scalability; and free-and-open-source software. %% RESULTS - Through an implementation, called "Maneage" (managing+lineage), we find that storing the project in machine-actionable and human-readable plain-text, enables version-control, cheap archiving, automatic parsing to extract data provenance, and peer-reviewable verification. + Through an implementation, called ``Maneage'' (managing+lineage), we find that storing the project in machine-actionable and human-readable plain-text, enables version-control, cheap archiving, automatic parsing to extract data provenance, and peer-reviewable verification. Furthermore, we show that these criteria are not limited to long-term reproducibility but also provide immediate, fast short-term reproducibility. %% CONCLUSION We conclude that requiring longevity from solutions is realistic. We discuss the benefits of these criteria for scientific progress. +\end{abstract} - \horizontalline - - \noindent - {\mpbold Keywords:} Data Lineage, Data Provenance, Reproducibility, Scientific Pipelines, Workflows - -% \noindent -% {\mpbold Note to DSJ editors or referees:} The distributed source of this project (described in Section \ref{sec:publishing}) is available in this URL: \url{https://akhlaghi.org/dsj-paper-\projectversion.tar.gz} -} - -\horizontalline - - +% Note that keywords are not normally used for peerreview papers. +\begin{IEEEkeywords} +Data Lineage, Provenance, Reproducibility, Scientific Pipelines, Workflows +\end{IEEEkeywords} +% For peer review papers, you can put extra information on the cover +% page as needed: +% \ifCLASSOPTIONpeerreview +% \begin{center} \bfseries EDICS Category: 3-BBND \end{center} +% \fi +% +% For peerreview papers, this IEEEtran command inserts a page break and +% creates the second title. It will be ignored for other modes. +\IEEEpeerreviewmaketitle \section{Introduction} -\label{sec:introduction} - -The increasing volume and complexity of data analysis has been extraordinarily productive, giving rise to a new branch of ``Big Data'' in many fields of the sciences and industry. -However, given its inherent complexity, the results are barely useful alone, questions naturally arise on their lineage or provenance: -What inputs were used? -How were the configurations or training data chosen? -What operations were done on those inputs, how were the plots made? -See Figure \ref{fig:questions} for some similar questions, classified by their place in project. -\tonote{Johan: add some general references.} - -Due to the complexity of modern data analysis, a small deviation in the final result can be due to many different steps, which may be significant for its interpretation. -Integrity checks are a critical component of the scientific method, but are only possible with access to the data \emph{and} its lineage (workflows). -For example, \citet{smart18} describes how a 7-year old conflict in theoretical condensed matter physics was only identified after the relative codes were shared. -\citet{miller06} found a mistaken column flipping in a project's workflow, leading to the retraction of 5 papers in major journals, including \emph{Science}. -\citet{baggerly09} highlighted the inadequate narrative description of the analysis and showed the prevalence of simple errors in published results, ultimately calling their work ``\emph{forensic bioinformatics}''. -\citet{herndon14} and \citet[a self-correction]{horvath15} also reported similar situations and \citet{ziemann16} concluded that one-fifth of papers contain erroneous gene name conversions. -These are mostly from genomics and bioinformatics because publishing workflows is commonly practiced already (for example \href{https://www.myexperiment.org}{myexperiment.org}, \href{https://www.genepattern.org}{genepattern.org}, and \href{https://galaxyproject.org}{galaxy\-project.org}). -The status in other fields, without a culture of publishing workflows, is highly likely to be worse. -Nature is already a black box which we are trying hard to unlock. -Not being able to experiment on the methods of other researchers is a self-imposed back box over it. - -\begin{figure}[t] - \begin{center} - \includetikz{figure-project-outline} - \end{center} - \vspace{-17mm} - \caption{\label{fig:questions}Graph of a generic project's workflow (connected through arrows), highlighting the various issues/questions on each step. - The green boxes with sharp edges are inputs and the blue boxes with rounded corners are intermediate or final outputs. - The red boxes with dashed edges highlight the main questions at each respective stage. - The box covering software download and build phases shows some common tools software developers use for this phase, but a scientific project is clearly much more involved. - } -\end{figure} - -The completeness of a project's published lineage (usually within the ``Methods'' section) can be measured by the ability to reproduce the result. -Several studies have attempted to answer this with different levels of detail. -For example, \citet{allen18} found that roughly half of the papers in astrophysics do not even mention the names of any analysis software, while \citet{menke20} found this fraction has greatly improved in medical/biological field and is currently above $80\%$. -\citet{ioannidis2009} attempted to reproduce 18 published results by two independent groups, but fully succeeded in only 2 of them and partially in 6. -\citet{chang15} attempted to reproduce 67 papers in well-regarded Economics journals that published data and code: only 22 could be reproduced without contacting authors, and more than half could not be replicated at all. \tonote{DVG: even after contacting the authors?} -\citet{stodden18} attempted it in 204 scientific papers published in the journal \emph{Science} \emph{after} adoptiong a policy of publishing the data and code associated with the papers. -Even though the authors were contacted, the success rate was an abysmal $26\%$. -Overall, this problem is unambiguously assessed as being very serious in the community: \citet{baker16} surveyed 1574 researchers and found that only $3\%$ did not see a ``\emph{reproducibility crisis}''. - -Yet, this is not a new problem in the sciences: back in 2011, Elsevier conducted an ``\emph{Executable Paper Grand Challenge}'' \citep{gabriel11} and the proposed solutions were published in a special edition.\tonote{DVG: which were the results?} -Even before that, in an attempt to simulate research projects, \citet{ioannidis05} proved that ``\emph{most claimed research findings are false}''. -In the 1990s, \citet{buckheit1995, claerbout1992} described the same problem very eloquently and also provided some solutions they used.\tonote{DVG: more details here, one is left wondering ...} -Even earlier, through his famous quartet, \citet{anscombe73} qualitatively showed how distancing of researchers from the intricacies of algorithms/methods can lead to misinterpretation of the results. -One of the earliest such efforts we are aware of is the work of \citet{roberts69}, who discussed conventions in Fortran programming and documentation to help in publishing research codes. -While the situation has somewhat improved, all these papers still resonate strongly with the frustrations of today's scientists. - -To address the collective problem of preserving a project's data lineage as well as its software dependencies, we introduce Maneage (Maneage+Lineage), pronounced man-ee-ij or \textipa{[m{\ae}n}i{\textsci}d{\textyogh}], hosted at \url{http://maneage.org}. -A project using Maneage starts by branching from its main Git branch, allowing the authors to customize it: specifying the necessary software tools for that particular project, adding analysis steps and adding visualizations and a narrative based on the results. -In Section \ref{sec:principles} the founding principles behind Maneage are discussed. -Section \ref{sec:maneage} describes the internal structure of Maneage and Section \ref{sec:discussion} is a discussion on its benefits, caveats and future prospects. - - - - - - +% The very first letter is a 2 line initial drop letter followed +% by the rest of the first word in caps. +\IEEEPARstart{T}{his} demo file is intended to serve as a ``starter file'' +for IEEE journal papers produced under \LaTeX\ using +IEEEtran.cls version 1.8b and later. +% You must have at least 2 lines in the paragraph with the drop letter +% (should never be an issue) +Here is an example citation \cite{akhlaghi19}. @@ -157,7 +115,7 @@ Section \ref{sec:maneage} describes the internal structure of Maneage and Sectio \label{sec:principles} The core principle of Maneage is simple: science is defined primarily by its method, not its result. -As \citet{buckheit1995} describe it, modern scientific papers are merely advertisements of scholarship, while the actual scholarship is the coding behind the plots/results. +As \cite{buckheit1995} describe it, modern scientific papers are merely advertisements of scholarship, while the actual scholarship is the coding behind the plots/results. Many solutions have been proposed in the last decades, including (but not limited to) 1992: \href{https://sep.stanford.edu/doku.php?id=sep:research:reproducible}{RED}, 2003: \href{https://taverna.incubator.apache.org}{Apache Taverna}, @@ -172,7 +130,7 @@ Many solutions have been proposed in the last decades, including (but not limite To help in the comparison, the founding principles of Maneage are listed below. -\begin{enumerate}[label={\bf P\arabic*}] +\begin{enumerate}%[label={\bf P\arabic*] \item \label{principle:complete}\textbf{Completeness:} A project that is complete, or self-contained, (P1.1) has no dependency beyond the Port\-able Operating System (OS) Interface, or POSIX, or a minimal Unix-like environment. @@ -182,12 +140,12 @@ To help in the comparison, the founding principles of Maneage are listed below. (P1.4) builds its software for an independent environment, (P1.5) can be run locally (without internet connection), (P1.6) contains the full project's analysis, visualization \emph{and} narrative, from access to raw inputs to producing final published format (e.g., PDF or HTML), - (P1.7) requires no manual/human interaction and can run automatically \citep[according to][``\emph{a clerk can do it}'']{claerbout1992}. + (P1.7) requires no manual/human interaction and can run automatically \cite[according to][``\emph{a clerk can do it}'']{claerbout1992}. \emph{Comparison with existing:} with many dependencies beyond POSIX, except for IPOL, none of the tools above are complete. For example, the workflow of most recent solutions need Python or Jupyter notebooks. Because of their complexity (see \ref{principle:complexity}), pre-built binary blobs like containers or virtual machines are the chosen storage format, which are large (Giga-bytes) and expensive to archive. - Furthermore, third-party package managers setup the environment, like Conda, or the OS's, like \inlinecode{apt} or \inlinecode{yum}. + Furthermore, third-party package managers setup the environment, like Conda, or the OS's, like apt or yum. However, exact versions of \emph{every software} are rarely included, and the servers remove old binaries, hence blobs are hard to recreate. Blobs also have a short lifespan, e.g., Docker containers made today, may not be operable with future versions of Docker or Linux (currently Linux 3.2.x is the earliest supported version, released in 2012). In general they mostly aim for short-term reproducibility. @@ -208,14 +166,14 @@ However, designing a modular project needs to be encouraged and facilitated. Otherwise, scientists, who are not usually trained in data management, will rarely design a modular project, leading to great inefficiencies in terms of project cost and/or scientific accuracy (testing/validating will be expensive). \item \label{principle:complexity}\textbf{Minimal complexity:} - This is Ockham's razor extrapolated to project management \citep[``\emph{Never posit pluralities without necessity}''][]{schaffer15}: + This is Ockham's razor extrapolated to project management \cite[``\emph{Never posit pluralities without necessity}''][]{schaffer15}: 1) avoid complex relations between analysis steps (related to \ref{principle:modularity}). 2) avoid the programming language that is currently in vogue, because it is going to fall out of fashion soon and require significant resources to translate or rewrite it every few years (to stay fashionable). The same job can be done with more stable/basic tools, requiring less long-term effort. \emph{Comparison with existing:} IPOL stands out here too (requiring only ISO C), however most others are written in Python, and use Conda or Jupyter (see \ref{principle:complete}). Besides being incomplete (\ref{principle:complete}), these tools have short lifespans and evolve fast (e.g., Python 2 code cannot run with Python 3, causing disruption in many projects). - Their complex dependency trees also making them hard to maintain, for example, see the dependency tree of Matlplotlib in \citet[][Figure 1]{alliez19}, its one of the simpler Jupyter dependencies. + Their complex dependency trees also making them hard to maintain, for example, see the dependency tree of Matlplotlib in \cite[][Figure 1]{alliez19}, its one of the simpler Jupyter dependencies. The longevity of a workflow is determined by its shortest-lived dependency. \item \label{principle:verify}\textbf{Verifiable inputs and outputs:} @@ -243,316 +201,8 @@ Most of the more recent solutions above are scalable. However, IPOL, which uniquely stands out in satisfying most principles, fails here: IPOL is devoted to low-level image processing algorithms that \emph{can be} done with no dependencies beyond an ISO C compiler. IPOL is thus not scalable to large projects, which commonly involve dozens of high-level dependencies, with complex data formats and analysis. -\item \label{principle:freesoftware}\textbf{Free and open source software:} - Technically, reproducibility \citet{fineberg19} is possible with non-free or non-open-source software (a black box). - This principle is thus necessary to complement that definition (nature is already a black box, we don't need another one): - (1) As a free software, others can learn from, modify, and build upon it. - (2) The lineage can be traced to free software's implemented algorithms, also enabling optimizations on that level. - (3) A free-software package that does not execute on particular hardware can be modified to work on it. - (4) A non-free software project typically cannot be distributed by others, making the whole community reliant on the owner's server (even if the owner does not ask for payments). - - \emph{Comparison with existing:} The existing solutions listed above are all free software. - Based on this principle, we do not consider non-free solutions. -\end{enumerate} - - - - - - - - -\section{Maneage} -\label{sec:maneage} -Maneage is an implementation of the principles of Section \ref{sec:principles}. -In practice, Maneage is a collection of plain-text files that are distributed in pre-defined sub-directories by context (a modular source), and are all under version control, currently with Git. -The main Maneage Branch is a fully working skeleton of a project without much flesh: it contains all the low-level infrastructure, but without any actual high-level analysis operations. -Maneage contains a file called \inlinecode{README-hacking.md} (the \inlinecode{README.md} file is reserved for the project using Maneage, not Maneage itself) that has a complete checklist of steps to start a new project and remove demonstration parts. -There are also hands-on tutorials to help new users. - -To start a new project, the authors \emph{clone} Maneage, create a branch, and start their project by customizing it. -Thus, projects start with a good data management strategy rather than imposing it in the end, as recommended by \citet{fineberg19}. -Customization is done by adding the names of the necessary software, references to input data, analysis and visualization commands and writting a narrative description. -This will be done in multiple commits during the project (perhaps years), preserving the project's history: the descriptions of, and motivations for, changes or test failures and successes, as well as the authors and timestamps of each change. - -\begin{lstlisting}[language=bash] - git clone https://git.maneage.org/project.git # Clone Maneage, default branch `maneage'. - mv project my-project && cd my-project # Set custom name and enter directory. - git remote rename origin origin-maneage # Rename remote server to use `origin' later. - git checkout -b master # Make new `master' branch, start customizing. -\end{lstlisting} - -Maneage has two main phases: (1) configuration, where the necessary software is built and the environment is set up, and (2) analysis, where data are accessed and the software is run to create the final visualizations and report: - -\begin{lstlisting}[language=bash] - ./project configure # Build all necessary software from source. - ./project make # Do the analysis (download data, run software on data, build PDF). -\end{lstlisting} - -Section \ref{sec:usingmake} elaborates why Make was chosen as the main job manager. -Sections \ref{sec:projectconfigure} \& \ref{sec:projectanalysis} are on the operations done during the configuration and analysis phase. -The benefit from version control is described in Section \ref{sec:projectgit}. -Section \ref{sec:collaborating} discusses the sharing of a built environment, and finally, Section \ref{sec:publishing} is about the publication, or archival, of Maneage projects. - -\subsection{Job orchestration with Make} -\label{sec:usingmake} - -Scripts (e.g. shell, Perl, or Python) are an obvious solution for non-interactive (batch) processing. -However, the inherent complexity and non-linearity of progress as a project evolves makes it hard to manage scripts. -For example, if $90\%$ of a research project is done and only the final $10\%$ must be executed, a script will re-do the whole project. -Completed parts can be manually ignored (with conditionals), but this adds to the complexity and discourages experimentation on already completed parts. -These problems motivated the creation of Make in the early Unix OS \citep{feldman79}. -Make contiues to be a core component of modern OSs, is actively maintained, and has withstood the test of time. - -The Make paradigm starts from the end: the final \emph{target}. -In Make, the project is broken into atomic \emph{rules}, where each rule has a single \emph{target} file which can depend on any number of \emph{prerequisite} files. -To build the target from the prerequisites, each rule also has a \emph{recipe} (an atomic script). -The plain-text files containing Make source code are called Makefiles. -Make does not replace scripting languages like the shell, Python or R. -It is a higher-level structure enabling modular/atomic scripts (in any language) to be put into a workflow. - -Besides formalizing a project's data lineage, Make also greatly encourages experimentation in a project, because a recipe is executed only when at least one prerequisite file is more recent than its target. -For example, when only $5\%$ of a project's targets are affected by a change, the other $95\%$ remain dormant. -Furthermore, Make first examines the full lineage before starting the execution of recipes, and it can thus execute independent rules in parallel, improving speed and encouraging experimentation. - -Make is well known by many outside of software development communities. -For example, geophysics students have easily adopted it for the RED project management tool \citep{schwab2000}. -We also received very good feedback on the simplicity of using Make from early adopters of Maneage, especially graduate students and postdocs. - - - - - - -\subsection{Project configuration} -\label{sec:projectconfigure} - -Maneage organizes both the building of its software and the analysis pipeline using Make (see Section \ref{sec:usingmake}). -Thus, a researcher using Maneage for high-level analysis easily understands and can customize the software environment without needing to learn third-party tools. -The existing tools listed in Section \ref{sec:principles} mostly use package managers like Conda to maintain the software environment, but Conda itself is written in Python, contrary to our completeness principle \ref{principle:complete}. -Highly-robust solutions like Nix and GNU Guix exist, but these require root permissions, contrary to principle P1.3. - -Project configuration (building the software environment) is managed by the files under \inlinecode{reproduce\-/soft\-ware} of Maneage's source. -At the start of project configuration, Maneage needs a top-level directory to build itself on the host (software and analysis). -We call this the ``build directory'' and it must not be located inside the source directory (see \ref{principle:modularity}). -No other location on the running OS will be affected by the project, including the source directory. -Two other local directories can optionally be specified by the project when inputs are present locally: 1) software tarball directory and 2) input data directory. -Sections \ref{sec:buildsoftware} and \ref{sec:softwarecitation} detail the building of the required software and the important issue of software citation. - -\subsubsection{Verifying and building necessary software from source} -\label{sec:buildsoftware} - -To compile the necessary software from source, Maneage currently needs the host to have a C and C++ compiler (available on any POSIX-compliant OS). -Maneage will build and install (in the build directory) all necessary software and their dependencies, all with fixed versions and configurations. -The dependency tree continues down to core OS components including GNU Bash, GNU AWK, GNU Coreutils on all supported OSs. -On GNU/Linux OSs, a fixed version of the GNU Binutils and GNU C Compiler (GCC) is also built, soon a custom GNU C Library will also be included to be fully independent of the host (task 15390). - -Except for very low level components like the Kernel or filesystem, Maneage thus builds all other components necessary for the project. -Because there is no pure/raw POSIX OS, Maneage aims to run on existing POSIX-compatible OSs, failure to build on anyone of them is treated as a bug, which will be fixed. -It is currently being actively tested on GNU/Linux and macOS. -A Maneage project can be configured in a container or virtual machine to facilitate moving the project without rebuilding everything from source, or to use it on non-compatible OSs. -However, such binary blobs are not the primary storage/archival format of Maneage. - -Before building the software, their source codes are validated by their SHA-512 checksum (stored in the project). -Maneage includes a growing collection of scientific software (and its dependencies), much of which is superfluous for any single project. -Therefore, each project has to identify its high-level software in the \inlinecode{TARGETS.conf} file. - -\subsubsection{Software citation} -\label{sec:softwarecitation} - -Maneage contains the full list of software that were built for the project but this information is buried deep into the source. -Maneage prints a simplified description of this information in the project's final report, blended into the narrative, as in the Acknowledgments of this paper. -Furthermore, when the software is associated with a published paper, that paper's Bib\TeX{} entry is added to the final report and is duly cited with the software's name and version. -This paper uses basic software without associated scientific papers. For software citation examples, see \citet{akhlaghi19} and \citet{infante20}. - -This is particularly important for research software, where citation is critical to justify continued development. -A notable example is GNU Parallel \citep{tange18} which prints citation information each time it is run, proposing to either cite the paper or support it with 10000 euros. -It provides a \inlinecode{--citation} option to disable the notice. -In \href{https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt?h=master}{its FAQ} this is justified by ``\emph{If you feel the benefit from using GNU Parallel is too small to warrant a citation, then prove that by simply using another tool}''. -Most software does not resort to such drastic measures. However, proper citation is not only useful practically, it is also an ethical imperative. - -Given the increasing role of software in research \citep{clement19}, automatic citation, is a robust solution. -For a review of the necessity and basic elements of software citation, see \citet{katz14} and \citet{smith16}. -The CodeMeta and Citation file format (CFF) aim to expand software citation beyond Bib\TeX, while Software Heritage \citep{dicosmo18} also includes archival and citation abilities. -These will be tested and enabled in Maneage. - - - - - -\subsection{Project analysis} -\label{sec:projectanalysis} - -The analysis operations run with no influence from the host OS, enabling an isolated environment without the extra layer of containers or a virtual machine. -In Maneage, a project's analysis is broken into two phases: 1) preparation, and 2) analysis. -Both have an identical internal structure. -The preparation phase is usually only necessary for advanced users who need to optimize extremely large datasets. - -The analysis phase consists of many steps, including data access (possibly by downloading), running various steps of the analysis on the raw inputs, and creating the necessary figures or tables for a published report, or output datasets for a database. -If all of these steps were organized in a single Makefile, it would become very long, and would be hard to maintain, extend, read, reuse, and cite. -Large files are in general a bad practice and against the modularity and minimal complexity principles (\ref{principle:modularity} \& \ref{principle:complexity}). - -Maneage is thus designed to encourage and facilitate modularity by distributing the analysis into many Makefiles that contain contextually-similar analysis steps. -Hereafter, these lower-level Makefiles are termed \emph{subMakefiles}. -When run with the \inlinecode{make} argument, the \inlinecode{project} script (Section \ref{sec:maneage}), calls \inlinecode{top-make.mk}, which loads the subMakefiles using the \inlinecode{include} directive (see Section \ref{sec:analysis}). -All the analysis Makefiles are in \inlinecode{re\-produce\-/anal\-ysis\-/make}. Figure \ref{fig:datalineage} shows their relationship with the target/built files that they manage. -To keep the project's logic clear and simple (minimal complexity principle, \ref{principle:complexity}), recursion (where one instance of Make calls Make internally) is, by default, not used. - -\begin{figure}[t] - \begin{center} - \includetikz{figure-data-lineage} - \end{center} - \vspace{-7mm} - \caption{\label{fig:datalineage}Schematic representation of a project's data lineage, or workflow, for the demonstration analysis of this paper. - Each colored box is a file in the project and the arrows show the dependencies between them. - Green files/boxes are plain-text files that are under version control and in the source directory. - Blue files/boxes are output files in the build-directory, shown within the Makefile (\inlinecode{*.mk}) where they are defined as a \emph{target}. - For example, \inlinecode{paper.pdf} depends on \inlinecode{project.tex} (in the build directory; generated automatically) and \inlinecode{paper.tex} (in the source directory; written manually). - The solid arrows and full-opacity built boxes are described in Section \ref{sec:projectanalysis}. - The dashed arrows and low-opacity built boxes show the scalability by adding hypothetical steps to the project. - } -\end{figure} - -To avoid getting too abstract in the subsections below, where necessary we will do a basic analysis on the data of \citet{menke20} (hereafter M20) and replicate one of the results. -We cannot use the same software as M20, because M20 used Microsoft Excel for their analysis, violating several of our principles: \ref{principle:complete}, \ref{principle:complexity} and \ref{principle:freesoftware}. -Since we do not use the same software, this does not qualify as a reproduction \citep{fineberg19}. -In the subsections below, this paper's analysis on that dataset is described using the data lineage graph of Figure \ref{fig:datalineage}. -We will follow Make's paradigm (see Section \ref{sec:usingmake}) of starting the lineage backwards form the ultimate target in Section \ref{sec:paperpdf} (bottom of Figure \ref{fig:datalineage}) to the configuration files \ref{sec:configfiles} (top of Figure \ref{fig:datalineage}). -To better understand this project, we recommend study of this paper's own Maneage source, published as a supplement. - -\subsubsection{Ultimate target: the project's paper or report (\inlinecode{paper.pdf})} -\label{sec:paperpdf} - -The ultimate purpose of a project is to report the data analysis result, as raw visualizations, or numbers blended in with a narrative. -In Figure \ref{fig:datalineage}, this is \inlinecode{paper.pdf}, which is the only built file (blue box) with no outwards arrows leaving it. -The instructions to build \inlinecode{paper.pdf} are in the \inlinecode{paper.mk} subMakefile. -Its prerequisites include \inlinecode{paper.tex} and \inlinecode{references.tex} (Bib\TeX{} entries for possible citations) in the project source and \inlinecode{project.tex} which is a built product. -The high-level connections of this project with previous projects are formalized by \inlinecode{references.tex}. - -\subsubsection{Values within text (\inlinecode{project.tex})} -\label{sec:valuesintext} - -Figures, plots, tables, datasets, and/or narrative are not the only outputs of a project. -In many cases, quantitative values from the analysis are also blended into the sentences of the report's narration, or published with the dataset in a database. -An example is in the abstract of \citet[\href{https://doi.org/10.5281/zenodo.3408481}{zenodo.3408481}, written in Maneage]{akhlaghi19}: ``\emph{... detect the outer wings of M51 down to S/N of 0.25 ...}''. -The value `0.25', for the signal-to-noise ratio (S/N), also depends on the analysis, and is thus also an output. -Manually typing such numbers in the narrative is prone to errors and discourages experimentation. - -To automatically generate and blend them in the text, Maneage uses \LaTeX{} macros. -For example, the \LaTeX{} source of the quote above is: ``\inlinecode{\small detect the outer wings of M51 down to S/N of \$\textbackslash{}demo\-sf\-optimized\-sn\$}''. -The ma\-cro ``\inlinecode{\small\textbackslash{}demosfoptimizedsn}'' is automatically created during the project. It expands to the value ``\inlinecode{0.25}'' when the PDF output is built. -All such values are referenced in \inlinecode{project.tex}. -However, managing them in a single file would violate the modularity principle, be hard to parallelize, frustrating to manage, and bug-prone. -All subMakefiles thus contain a fixed target with the same name but a different suffix: \inlinecode{.tex} instead of \inlinecode{.mk}, hosting values generated in that subMakefile. -Figure \ref{fig:datalineage} shows them as built products of every subMakefile, except for \inlinecode{paper.mk}. -These \LaTeX{} macro files form the core skeleton of a Maneage project: as shown in Figure \ref{fig:datalineage}, the outward arrows of all built files in any subMakefile ultimately lead to one of these \LaTeX{} macro files. - -\subsubsection{Verification of outputs (\inlinecode{verify.mk})} -\label{sec:outputverification} - -Before the modular \LaTeX{} macro files described above are merged into the single \inlinecode{project.tex} file, they need to pass through the verification filter, which implements another core principle of Maneage, \ref{principle:verify}. -Confirming the checksum of the final PDF or of figures and datasets is not generally useful because many tools write the creation date into the files. -To avoid this, the raw data must be verified independent of metadata like date. -Some standards include such date-independent verification features, for example, the \inlinecode{DATASUM} keyword in the FITS format \citep{pence10}. -To facilitate output verification, Maneage has the \inlinecode{verify.mk} subMakefile that separates the analytical phase of the paper from the production of the report (see Figure \ref{fig:datalineage}). -This file implements some tests on pre-defined formats. -Other formats can easily be added. - -\subsubsection{The analysis} -\label{sec:analysis} - -The analysis is demonstrated with the practical example of replicating Figure 1C of M20, with some enhancements, in Figure \ref{fig:toolsperyear}. -As shown in Figure \ref{fig:datalineage}, for this example we split this goal into two subMakefiles: \inlinecode{format.mk} and \inlinecode{demo-plot.mk}. -The former converts the Excel-formatted input into comma-separated value (CSV) format, and the latter generates the table to build Figure \ref{fig:toolsperyear}. -In a real project, subMakefiles could, and will, be much more complex. -Their location after the standard starting subMakefiles (initialization and download) and before the standard ending subMakefiles (verification and final paper) is important, along with their order. - -\begin{figure}[t] - \begin{center} - \includetikz{figure-tools-per-year} - \end{center} - \vspace{-5mm} - \caption{\label{fig:toolsperyear}Ratio of papers mentioning software tools (green line, left vertical axis) to total number of papers studied in that year (light red bars, right vertical axis in log-scale). - This is an enhanced replica of figure 1C \citet{menke20}, shown here for demonstrating Maneage, see Figure \ref{fig:datalineage} for its lineage and Section \ref{sec:analysis} for how it was organized. - } -\end{figure} - -To enhance the original M20 plot, Figure \ref{fig:toolsperyear} also shows the number of papers in each year and its horizontal axis shows the full range of the data (starting from \menkefirstyear), while M20 starts from 1997. -This was probably because the authors judged the earlier years' data to be too noisy. For example, in \menkenumpapersdemoyear, only \menkenumpapersdemocount{} papers were analysed. -Both the numbers in the previous sentence (\menkenumpapersdemoyear{} and \menkenumpapersdemocount), and the dataset's oldest year (mentioned above: \menkefirstyear) are automatically generated \LaTeX{} macros, see Section \ref{sec:valuesintext}. -These are \emph{not} typeset manually in this narrative explanation. -This step (generating the macros) is shown schematically in Figure \ref{fig:datalineage} with the arrow from \inlinecode{tools-per-year.txt} to \inlinecode{demo-plot.tex}. - -To create Figure \ref{fig:toolsperyear}, we used the PGFPlots package within \LaTeX{}. -Therefore, the necessary analysis output to feed into \LaTeX{} was a plain-text table with 3 columns (year, paper per year, tool fraction per year). -This table is shown in the lineage graph of Figure \ref{fig:datalineage} as \inlinecode{tools-per-year.txt} and The PGFPlots source to generate this figure is located in \inlinecode{tex\-/src\-/figure\--tools\--per\--year\-.tex}. -If another plotting tool was desired (for example Python's Matplotlib, or Gnuplot), the built graphic file (for example \inlinecode{tools-per-year.pdf}) would be the target instead. - -The file \inlinecode{tools-per-year.txt} is a value-added table with only \menkenumyears{} rows (one row for every year). -The original dataset had \menkenumorigrows{} rows (one row for each year of each journal). -We see in Figure \ref{fig:datalineage} that it is defined as a Make \emph{target} in \inlinecode{demo-plot.mk} and that its prerequisite is \inlinecode{menke20-table-3.txt} (schematically shown by the arrow connecting them). -Both the row counts mentioned at the start of this paragraph are again macros. -In Figure \ref{fig:datalineage}, we see that \inlinecode{menke20-table-3.txt} is a target in \inlinecode{format.mk} and its prerequisite is the input file \inlinecode{menke20.xlsx} (XLSX I/O is used for the conversion). -The input files (which come from outside the project) are all \emph{targets} in \inlinecode{download.mk} and futher discussed in Section \ref{sec:download}. - - - -\subsubsection{Importing and validating inputs (\inlinecode{download.mk})} -\label{sec:download} - -The \inlinecode{download.mk} subMakefile is present in all projects, containing common steps for importing the input dataset(s). -All necessary datasets are imported through this subMakefile, irrespective of where the dataset is \emph{used}. -The relation between the project and the outside world is maintained in this single subMakefile, aiming at modularity (\ref{principle:modularity}) minimal complexity (\ref{principle:complexity}) and internet security. - -Each external dataset has some basic information, including its expected name on the local system (for offline access), a checksum to validate it (either the whole file or just its main ``data'', as discussed in Section \ref{sec:outputverification}), and its URL/PID. -In Maneage, they are stored in the \inlinecode{INPUTS.conf} file. -See Figure \ref{fig:datalineage} for the position of \inlinecode{INPUTS.conf} in the project's file structure and data lineage, respectively. -Each is stored as a Make variable, and is automatically loaded into the full project when Make starts, like other configuration files, usable in any subMakefile. - - - -\subsubsection{Configuration files} -\label{sec:configfiles} - -The subMakefiles discussed above should only organize the analysis, they should not contain any fixed numbers, settings or parameters, which should instead be set as variables in configuration files. -Configuration files logically separate the low-level implementation from the high-level running of a project. -In the data lineage plot of Figure \ref{fig:datalineage}, configuration files are shown as sharp-edged, green \inlinecode{*.conf} boxes in the top row (for example, the file \inlinecode{INPUTS.conf} that was mentioned in Section \ref{sec:download}). -All the configuration files of a project are placed under the \inlinecode{reproduce/analysis/config} subdirectory, and are loaded into \inlinecode{top-make.mk} before any of the subMakefiles, hence they are available to all of them. - -The example analysis in Section \ref{sec:analysis}, in which we reported the number of papers studied by M20 in \menkenumpapersdemoyear, illustrates this. -The year ``\menkenumpapersdemoyear'' is not written by hand in \inlinecode{demo-plot.mk}. -It is referenced through the \inlinecode{menke-year-demo} variable, which is defined in \inlinecode{menke-demo-year.conf}, which is a prerequisite of the \inlinecode{demo\--plot\-.tex} rule, see it in Figure \ref{fig:datalineage}. -If we wished to report the number in a different year, it would be sufficient to change the value in \inlinecode{menke-demo-year.conf}. -A configuration file is a prerequisite of the target that uses it, so after the change, its timestamp will be newer than \inlinecode{demo-plot.tex}. -Thus, Make will re-execute the recipe to generate the macro file before this paper is re-built and the corresponding year and value will be updated in this paper, always in synchronization with each other and no matter how many times they are used. -Combined with the fact that all source files in Maneage are under version control, this encourages testing of various settings of the -analysis as the project evolves in the case of exploratory research papers, and better self-consistency in hypothesis testing papers. - -\subsubsection{Project initialization (\inlinecode{initialize.mk})} -\label{sec:initialize} - -The \inlinecode{initial\-ize\-.mk} subMakefile is present in all projects and is the first subMakefile that is loaded into \inlinecode{top-make.mk} (see Figures \ref{fig:datalineage}). -It does not contain any analysis or major processing steps, it just initializes the system by setting the necessary Make environment as well as other general jobs like defining the Git commit hash of the run as a \LaTeX{} (\inlinecode{\textbackslash{}projectversion}) macro that can be loaded into the narrative. -Papers using Maneage usually put this hash as the last word in their abstract, for example, see \citet{akhlaghi19} and \citet{infante20}. -For the current version of this paper, it expands to \projectversion. - -\subsection{Projects as Git branches of Maneage} -\label{sec:projectgit} - -Maneage projects are primarily stored as plain-text files. -It can thus be efficiently maintained under version control systems (currently using Git). -Every commit in the version-controlled history contains \emph{a complete} snapshot of the data lineage (see the completeness principle \ref{principle:complete}). -Maneage is maintained by its developers in a central branch, \inlinecode{man\-eage}. -The \inlinecode{man\-eage} branch contains all the low-level infrastructure, a skeleton, that is needed by any new project. -As shown in Section \ref{sec:maneage} new projects start by cloning \inlinecode{man\-eage} and customizing their own Git branch, or fork. -Figure \ref{fig:branching}(a) shows how a project has started by branching off commit \inlinecode{0c120cb}. - -%% Exact URLs of imported images. -%% Collaboration icon: https://www.flaticon.com/free-icon/collaboration_809522 -%% Paper done: https://www.flaticon.com/free-icon/file_2521838 -%% Paper processing: https://www.flaticon.com/free-icon/file_2521989 -\begin{figure}[t] - \includetikz{figure-branching} +\begin{figure*}[t] + \begin{center} \includetikz{figure-branching}\end{center} \vspace{-3mm} \caption{\label{fig:branching} Harvesting the power of version-control in project management with Maneage. Maneage is maintained as a core branch, with projects created by branching off it. @@ -562,89 +212,19 @@ Figure \ref{fig:branching}(a) shows how a project has started by branching off c The commits are colored based on the team that is working on that branch. The collaboration and paper icons are respectively made by `mynamepong' and `iconixar' and downloaded from \url{www.flaticon.com}. } -\end{figure} - -After a project starts, Maneage will evolve with new features or fixed bugs. -Because all projects branch from it, updating the project's low-level skeleton is as easy as merging the \inlinecode{maneage} branch into the project's branch. -For example, in Figure \ref{fig:branching}(a), see how Maneage's \inlinecode{3c05235} commit has been merged into the project's branch in commit \inlinecode{2ed0c82}. -Hence infrastructure improvements and fixes are easily propagated to all projects. - -Another useful scenario is reviving a finished/published project at a later date, possibly by other researchers as shown in Figure \ref{fig:branching}(b), e.g., assuming the original project was completed years ago, and is no longer directly executable. -Other scenarios include projects that are created by merging various other projects. -Modern version control systems provide many more capabilities that can be leveraged through Maneage in project management, thanks to the shared branch it has with \emph{all} derived projects, and thanks to its completeness (\ref{principle:complete}). - -\subsection{Multi-user collaboration on single build directory} -\label{sec:collaborating} - -Because the project's source and build directories are separate, an option is provided for different users to share a build directory, while working on their own separate project branches during a collaboration. -This is similar to the parallel branch that is later merged in Figure \ref{fig:branching}(a). -To enable this mode, the \inlinecode{./project} script has an option \inlinecode{--group} that must be given the name of a (POSIX) user group in the host OS. -All built files are then automatically assigned to this user group, with read and write permissions for all members. -Permission management and avoiding conflicts in the build directory (while members work on different branches) is the responsibility of the team. - -\subsection{Publishing the project} -\label{sec:publishing} - -In a scientific scenario, the final report is submitted to a journal, while in an industrial context it is submitted to the customers or employers. -To facilitate publication of the project's source with the narrative, Maneage has a \inlinecode{dist} target, which is activated with \inlinecode{./project make dist}. -In this mode, Maneage will not do any analysis, but will instead put full project's source (for the given commit, without the version history), with all the built files that are necessary for \LaTeX{}, into a compressed \inlinecode{.tar.gz} file. -This is useful for publishers to create the report without necessarily building the full project: since the full project source is included, it can be rebuilt. -The \inlinecode{dist-zip} target provides Zip compression as an alternative. -Depending on the built graphics used in the report, this compressed file will usually be roughly a mega-byte. - -However, the required inputs and the outputs may be much bigger, from megabytes to petabytes. -This gives two scenarios for publication of the project: 1) publishing only the source, or 2) publishing the source with the data. -In the former case, the output of \inlinecode{dist} can be submitted to the journal as a supplement, or uploaded to pre-print servers like \href{https://arXiv.org}{arXiv} that will compile the \LaTeX{} source and build their own PDFs. -The Git history can also be archived as a single ``bundle'' file and submitted as a supplement. -When publishing with datasets, the project's outputs, and/or inputs, can be published on servers like Zenodo. -For example, \citet[\href{https://doi.org/10.5281/zenodo.3408481}{zenodo.3408481}]{akhlaghi19} uploaded all the project's required software tarballs (mentioned in the acknowledgements) and its final PDF, along with the project's source and a Git ``bundle''. - - - - - - - - - -\section{Discussion \& Caveats} -\label{sec:discussion} - -To optimally extract the potentials of big data in science, we need to have a complete view of its lineage. -Scientists are, however, rarely trained sufficiently in data management or software development, and the plethora of high-level tools that change every few years does not help. -Such high-level tools are primarily targetted at software developers, who are paid to learn them and use them effectively for short-term projects. -Scientists, on the other hand, need to focus on their own research fields, and need to think about longevity. - -The primordial implementation was written for \citet{akhlaghi15}. -To use in other projects without a full re-write, the skeleton was separated from the flesh as a more abstract ``template'' that was used in \citet{bacon17}, in particular Sections 4 and 7.3 (respectively in \href{http://doi.org/10.5281/zenodo.1163746}{zenodo.1163746} and \href{http://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}). -Later, software building was incorporated and used in \citet[\href{https://doi.org/10.5281/zenodo.3408481}{zenodo.3408481}]{akhlaghi19} and \citet[\href{https://doi.org/10.5281/zenodo.3524937}{zenodo.3524937}]{infante20}. -After this paper is published, bugs will still be found and Maneage will continue to evolve and improve, significant changes from this paper will be listed in \inlinecode{README-hacking.md}. - -Adoption of Maneage projects on a wide scale will make it possible to feed these into machine learning (ML) tools for automatic workflow generation, optimized for desired characteristics of the results. -Because Maneage is complete, algorithms and data selection methods can be optimized and by connecting the analysis directly to the narrative and history of a project, natural language processing can be studied. -Parsers can be written over Maneage-derived projects for meta-research and data provenance studies, for example to generate ``research objects''. -As another example, when a bug is found in one software package, all affected projects can be found and the scale of the effect can be measured. -Combined with SoftwareHeritage, precise high-level science parts of Maneage projects can be accurately cited (e.g., failed/abandoned tests at any historical point). -Many components of ``machine-actionable'' data management plans \citep{miksa19b} can be automatically filled out by Maneage, which is useful for project PIs and and grant funders. - -Maneage was awarded a Research Data Alliance (RDA) adoption grant for implementing the recommendations of the Publishing Data Workflows working group \citep{austin17}. -Maneage's user base and development grew phenomenally, highlighting caveats. -Firstly, Maneage uses very low-level tools that are not widely used by scientists, e.g., Git, \LaTeX, Make and the command line. -This is primarily because of a lack of exposure. -Witnessing the improvements in their research, many (especially early career researchers) have started mastering these tools as they adopt Maneage. -We are thus working on tutorials and improving documentation. - -Secondly, the many software packages used on various POSIX-compatible systems require maintenance. -However, because Maneage builds its software in the same Make framework as the analysis, users' experience in the analysis empowers them to add/fix their required software with the same Make tools. -This has already happened, with improvements contributed to the core Maneage branch, propagating to all projects. - -Thirdly, publishing a project's reproducible data lineage immediately after publication enables others to continue with followup papers in competition with the original authors. -We propose these solutions: -1) Through the Git history, the work added by another team at any phase of the project can be quantified, contributing to a new concept of authorship in scientific projects and helping to quantify Newton's famous ``\emph{standing on the shoulders of giants}'' quote. -This is a long-term goal and requires major changes to academic value systems. -2) Authors can be given a grace period where the journal or a third party embargoes the source, keeping it private for the embargo period and then publishing it. +\end{figure*} +\item \label{principle:freesoftware}\textbf{Free and open source software:} + Technically, reproducibility \cite{fineberg19} is possible with non-free or non-open-source software (a black box). + This principle is thus necessary to complement that definition (nature is already a black box, we don't need another one): + (1) As a free software, others can learn from, modify, and build upon it. + (2) The lineage can be traced to free software's implemented algorithms, also enabling optimizations on that level. + (3) A free-software package that does not execute on particular hardware can be modified to work on it. + (4) A non-free software project typically cannot be distributed by others, making the whole community reliant on the owner's server (even if the owner does not ask for payments). + \emph{Comparison with existing:} The existing solutions listed above are all free software. + Based on this principle, we do not consider non-free solutions. +\end{enumerate} @@ -652,9 +232,9 @@ This is a long-term goal and requires major changes to academic value systems. +% use section* for acknowledgment +\section*{Acknowledgment} -%% Acknowledgements -\section*{Acknowledgments} The authors wish to thank (sorted alphabetically) Julia Aguilar-Cabello, Alice Allen, @@ -677,7 +257,7 @@ and Ignacio Trujillo for their useful help, suggestions and feedback on Maneage and this paper. Work on Maneage, and this paper, has been partially funded/supported by the following institutions: -The Japanese Ministry of Education, Culture, Sports, Science, and Technology ({\small MEXT}) PhD scholarship to M. Akhl\-aghi and its Grant-in-Aid for Scientific Research (21244012, 24253003). +The Japanese Ministry of Education, Culture, Sports, Science, and Technology (MEXT) PhD scholarship to M. Akhl\-aghi and its Grant-in-Aid for Scientific Research (21244012, 24253003). The European Research Council (ERC) advanced grant 339659-MUSICOS. The European Union (EU) Horizon 2020 (H2020) research and innovation programmes No 777388 under RDA EU 4.0 project, and Marie Sk\l{}odowska-Curie grant agreement No 721463 to the SUNDIAL ITN. The State Research Agency (AEI) of the Spanish Ministry of Science, Innovation and Universities (MCIU) and the European Regional Development Fund (ERDF) under the grant AYA2016-76219-P. @@ -688,34 +268,53 @@ The Polish MNiSW grant DIR/WK/2018/12. The Pozna\'n Supercomputing and Networking Center (PSNC) computational grant 314. -\input{tex/build/macros/dependencies.tex} -\section*{Competing Interests} -The authors have no competing interests to declare. -\section*{Author Contributions} -\begin{enumerate} -\item Mohammad Akhlaghi: principal author of the Maneage source code and this paper, also principal investigator (PI) of the RDA Adoption grant awarded to Maneage. -\item Ra\'ul Infante-Sainz: contributed many commits to the source of Maneage, also involved in early testing and writing this paper. -\item Boudewijn F. Roukema: involved in Maneage and its testing, contributed to writing this paper. -\item David Valls-Gabaud: involved in the Maneage project and its testing and contributed to writing this paper. -\item Roberto Baena-Gall\'e: contributed to early testing of Maneage and in writing this paper. -\end{enumerate} -%% Tell BibLaTeX to put the bibliography list here. -\printbibliography -%% Finish LaTeX + + + +%% Bibliography +\bibliographystyle{IEEEtran} +\bibliography{IEEEabrv,/home/mohammad/documents/personal/professional/data-science/maneage/paper/source/tex/src/references} + +%% Biography +\begin{IEEEbiographynophoto}{Mohammad Akhlaghi} + is currently a big data postdoctoral researcher at the Instituto de Astrof\'isica de Canarias, Tenerife, Spain. + His main scientific interest is in early galaxy evolution, but to extract information from the modern complex datasets, he has been involved in image processing and reproducible workflow management where he has founded GNU Astronomy Utilities (Gnuastro) and Maneage. + He received his PhD in astronomy from Tohoku University, Sendai Japan, and also held a postdoc position at the Centre de Recherche Astrophysique de Lyon (CRAL). + Contact him at mohammad@akhlaghi.org and find his website at https://akhlaghi.org. +\end{IEEEbiographynophoto} + +\begin{IEEEbiographynophoto}{Ra\'ul Infante-Sainz} + is currently a doctoral student at the Instituto de Astrof\'isica de Canarias, Tenerife, Spain. + Contact him at infantesainz@gmail.com. +\end{IEEEbiographynophoto} + +\begin{IEEEbiographynophoto}{Boudewijn F. Roukema} + is currently a professor at the Astronomy and Informatics department of Nicolaus Copernicus University in Toru\'n, Poland. + Contact him at boud@astro.uni.torun.pl. +\end{IEEEbiographynophoto} + +\begin{IEEEbiographynophoto}{David Valls-Gabaud} + is currently a professor at the Observatoire de Paris, France. + Contact him at david.valls-gabaud@obspm.fr. +\end{IEEEbiographynophoto} + +\begin{IEEEbiographynophoto}{Roberto Baena-Gall\'e} + is currently a postdoctoral fellow at the Instituto de Astrof\'isica de Canarias, Tenerife, Spain. + Contact him at roberto.baena@gmail.com. +\end{IEEEbiographynophoto} + \end{document} -%% This file is part of Maneage (https://maneage.org). -% -%% This file is part of Maneage. Maneage is free software: you can -%% redistribute it and/or modify it under the terms of the GNU General -%% Public License as published by the Free Software Foundation, either -%% version 3 of the License, or (at your option) any later version. +%% This file is free software: you can redistribute it and/or modify it +%% under the terms of the GNU General Public License as published by the +%% Free Software Foundation, either version 3 of the License, or (at your +%% option) any later version. % -%% Maneage is distributed in the hope that it will be useful, but WITHOUT +%% This file is distributed in the hope that it will be useful, but WITHOUT %% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -%% FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -%% more details. See . +%% FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +%% for more details. See . diff --git a/reproduce/analysis/make/paper.mk b/reproduce/analysis/make/paper.mk index e7949d0..4f2088b 100644 --- a/reproduce/analysis/make/paper.mk +++ b/reproduce/analysis/make/paper.mk @@ -41,7 +41,7 @@ $(mtexdir)/project.tex: $(mtexdir)/verify.tex # If no PDF is requested, or if LaTeX isn't available, don't # continue to building the final PDF. Otherwise, merge all the TeX # macros into one for building the PDF. - @if [ -f .local/bin/lualatex ] && [ x"$(pdf-build-final)" != x ]; then + @if [ -f .local/bin/latex ] && [ x"$(pdf-build-final)" != x ]; then # Put a LaTeX input command for all the necessary macro files. rm -f $(mtexdir)/project.tex @@ -83,10 +83,10 @@ $(mtexdir)/project.tex: $(mtexdir)/verify.tex # # NOTE: `$(mtexdir)/project.tex' is an order-only-prerequisite for # `paper.bbl'. This is because we need to run LaTeX in both the `paper.bbl' -# recipe and the `paper.pdf' recipe. But if `tex/src/references.tex' hasn't +# recipe and the `paper.pdf' recipe. But if `tex/src/references.bib' hasn't # been modified, we don't want to re-build the bibliography, only the final # PDF. -$(texbdir)/paper.bbl: tex/src/references.tex $(mtexdir)/dependencies-bib.tex \ +$(texbdir)/paper.bbl: tex/src/references.bib $(mtexdir)/dependencies-bib.tex \ | $(mtexdir)/project.tex # If `$(mtexdir)/project.tex' is empty, don't build PDF. @macros=$$(cat $(mtexdir)/project.tex) @@ -97,9 +97,9 @@ $(texbdir)/paper.bbl: tex/src/references.tex $(mtexdir)/dependencies-bib.tex \ p=$$(pwd) export TEXINPUTS=$$p: cd $(texbdir); - lualatex -shell-escape -halt-on-error $$p/paper.tex - biber paper - + latex -shell-escape -halt-on-error $$p/paper.tex + bibtex paper + latex -shell-escape -halt-on-error $$p/paper.tex fi @@ -124,7 +124,14 @@ paper.pdf: $(mtexdir)/project.tex paper.tex $(texbdir)/paper.bbl p=$$(pwd) export TEXINPUTS=$$p: cd $(texbdir) - lualatex -shell-escape -halt-on-error $$p/paper.tex + latex -shell-escape -halt-on-error $$p/paper.tex + + # Convert the DVI to PostScript, and the PostScript to PDF. The + # `-dNOSAFER' option to GhostScript allows transparencies in the + # conversion from PostScript to PDF, see + # https://www.ghostscript.com/doc/current/Language.htm#Transparency + dvips paper.dvi + ps2pdf -dNOSAFER paper.ps # Come back to the top project directory and copy the built PDF # file here. diff --git a/reproduce/software/config/TARGETS.conf b/reproduce/software/config/TARGETS.conf index 00ce387..50392fb 100644 --- a/reproduce/software/config/TARGETS.conf +++ b/reproduce/software/config/TARGETS.conf @@ -36,7 +36,10 @@ # Programs and libraries. -top-level-programs = xlsxio +# +# Ghostscript: to build PDF paper (in particular the `ps2pdf' command). +# XLSXI/O: to read and write XLSX files. +top-level-programs = ghostscript xlsxio # Python libraries/modules. -top-level-python = +top-level-python = diff --git a/reproduce/software/config/texlive-packages.conf b/reproduce/software/config/texlive-packages.conf index 6760eba..70f246e 100644 --- a/reproduce/software/config/texlive-packages.conf +++ b/reproduce/software/config/texlive-packages.conf @@ -16,12 +16,4 @@ # the basic installation scheme that we used to install tlmgr, they will be # ignored in the `tlmgr install' command, but will be used later when we # want their versions. -texlive-packages = tex fancyhdr ec newtx fontaxes xkeyval etoolbox xcolor \ - setspace caption footmisc datetime fmtcount titlesec \ - preprint ulem biblatex biber logreq pgf pgfplots fp \ - courier tex-gyre txfonts times csquotes kastrup \ - trimspaces pdftexcmds pdfescape letltxmacro bitset \ - mweights \ - \ - alegreya enumitem fontspec lastpage listings environ \ - tcolorbox +texlive-packages = times IEEEtran cite xcolor pgfplots courier ps2eps diff --git a/tex/img/icon-collaboration.eps b/tex/img/icon-collaboration.eps new file mode 100644 index 0000000..7c3a78c --- /dev/null +++ b/tex/img/icon-collaboration.eps @@ -0,0 +1,159 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: cairo 1.17.3 (https://cairographics.org) +%%CreationDate: Sat May 2 03:38:34 2020 +%%Pages: 1 +%%DocumentData: Clean7Bit +%%LanguageLevel: 2 +%%BoundingBox: 0 0 360 360 +%%EndComments +%%BeginProlog +50 dict begin +/q { gsave } bind def +/Q { grestore } bind def +/cm { 6 array astore concat } bind def +/w { setlinewidth } bind def +/J { setlinecap } bind def +/j { setlinejoin } bind def +/M { setmiterlimit } bind def +/d { setdash } bind def +/m { moveto } bind def +/l { lineto } bind def +/c { curveto } bind def +/h { closepath } bind def +/re { exch dup neg 3 1 roll 5 3 roll moveto 0 rlineto + 0 exch rlineto 0 rlineto closepath } bind def +/S { stroke } bind def +/f { fill } bind def +/f* { eofill } bind def +/n { newpath } bind def +/W { clip } bind def +/W* { eoclip } bind def +/BT { } bind def +/ET { } bind def +/BDC { mark 3 1 roll /BDC pdfmark } bind def +/EMC { mark /EMC pdfmark } bind def +/cairo_store_point { /cairo_point_y exch def /cairo_point_x exch def } def +/Tj { show currentpoint cairo_store_point } bind def +/TJ { + { + dup + type /stringtype eq + { show } { -0.001 mul 0 cairo_font_matrix dtransform rmoveto } ifelse + } forall + currentpoint cairo_store_point +} bind def +/cairo_selectfont { cairo_font_matrix aload pop pop pop 0 0 6 array astore + cairo_font exch selectfont cairo_point_x cairo_point_y moveto } bind def +/Tf { pop /cairo_font exch def /cairo_font_matrix where + { pop cairo_selectfont } if } bind def +/Td { matrix translate cairo_font_matrix matrix concatmatrix dup + /cairo_font_matrix exch def dup 4 get exch 5 get cairo_store_point + /cairo_font where { pop cairo_selectfont } if } bind def +/Tm { 2 copy 8 2 roll 6 array astore /cairo_font_matrix exch def + cairo_store_point /cairo_font where { pop cairo_selectfont } if } bind def +/g { setgray } bind def +/rg { setrgbcolor } bind def +/d1 { setcachedevice } bind def +/cairo_data_source { + CairoDataIndex CairoData length lt + { CairoData CairoDataIndex get /CairoDataIndex CairoDataIndex 1 add def } + { () } ifelse +} def +/cairo_flush_ascii85_file { cairo_ascii85_file status { cairo_ascii85_file flushfile } if } def +/cairo_image { image cairo_flush_ascii85_file } def +/cairo_imagemask { imagemask cairo_flush_ascii85_file } def +%%EndProlog +%%BeginSetup +%%EndSetup +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 0 0 360 360 +%%EndPageSetup +q 0 0 360 360 rectclip +1 0 0 -1 0 360 cm q +0 g +180 0 m 160.117 0 144 16.117 144 36 c 144 55.883 160.117 72 180 72 c 199.875 + 71.98 215.98 55.875 216 36 c 216 16.117 199.883 0 180 0 c h +180 60 m 166.746 60 156 49.254 156 36 c 156 22.746 166.746 12 180 12 c +193.254 12 204 22.746 204 36 c 204 49.254 193.254 60 180 60 c h +180 60 m f +204 78 m 202.586 78 l 201.316 78.629 l 187.891 85.316 172.109 85.316 158.684 + 78.629 c 157.414 78 l 156 78 l 139.441 78.02 126.02 91.441 126 108 c 126 + 144 l 126 153.941 134.059 162 144 162 c 216 162 l 225.941 162 234 153.941 + 234 144 c 234 108 l 233.98 91.441 220.559 78.02 204 78 c h +222 144 m 222 147.312 219.312 150 216 150 c 144 150 l 140.688 150 138 147.312 + 138 144 c 138 108 l 138 98.555 145.301 90.719 154.723 90.047 c 170.754 +97.488 189.246 97.488 205.277 90.047 c 214.699 90.719 222 98.555 222 108 + c h +222 144 m f +306 198 m 286.117 198 270 214.117 270 234 c 270 253.883 286.117 270 306 + 270 c 325.883 270 342 253.883 342 234 c 341.98 214.125 325.875 198.02 306 + 198 c h +306 258 m 292.746 258 282 247.254 282 234 c 282 220.746 292.746 210 306 + 210 c 319.254 210 330 220.746 330 234 c 330 247.254 319.254 258 306 258 + c h +306 258 m f +330 276 m 328.586 276 l 327.316 276.602 l 313.891 283.285 298.109 283.285 + 284.684 276.602 c 283.414 276 l 282 276 l 265.441 276.02 252.02 289.441 + 252 306 c 252 342 l 252 351.941 260.059 360 270 360 c 342 360 l 351.941 + 360 360 351.941 360 342 c 360 306 l 359.98 289.441 346.559 276.02 330 276 + c h +348 342 m 348 345.312 345.312 348 342 348 c 270 348 l 266.688 348 264 345.312 + 264 342 c 264 306 l 264 296.555 271.301 288.719 280.723 288.047 c 296.754 + 295.488 315.246 295.488 331.277 288.047 c 340.699 288.719 348 296.555 348 + 306 c h +348 342 m f +54 198 m 34.117 198 18 214.117 18 234 c 18 253.883 34.117 270 54 270 c +73.883 270 90 253.883 90 234 c 89.98 214.125 73.875 198.02 54 198 c h +54 258 m 40.746 258 30 247.254 30 234 c 30 220.746 40.746 210 54 210 c +67.254 210 78 220.746 78 234 c 78 247.254 67.254 258 54 258 c h +54 258 m f +78 276 m 76.586 276 l 75.316 276.602 l 61.891 283.285 46.109 283.285 32.684 + 276.602 c 31.414 276 l 30 276 l 13.441 276.02 0.02 289.441 0 306 c 0 342 + l 0 351.941 8.059 360 18 360 c 90 360 l 99.941 360 108 351.941 108 342 +c 108 306 l 107.98 289.441 94.559 276.02 78 276 c h +96 342 m 96 345.312 93.312 348 90 348 c 18 348 l 14.688 348 12 345.312 +12 342 c 12 306 l 12 296.555 19.301 288.719 28.723 288.047 c 44.754 295.488 + 63.246 295.488 79.277 288.047 c 88.699 288.719 96 296.555 96 306 c h +96 342 m f +239.527 304.707 m 239.527 304.699 239.523 304.695 239.52 304.688 c 238.223 + 301.637 234.695 300.219 231.648 301.52 c 204.555 312.871 174.465 314.945 + 146.07 307.41 c 151.688 305.762 l 154.867 304.828 156.691 301.496 155.762 + 298.312 c 154.828 295.133 151.496 293.309 148.316 294.238 c 124.316 301.277 + l 124.227 301.277 124.16 301.379 124.074 301.41 c 123.52 301.617 122.996 + 301.906 122.527 302.27 c 122.316 302.379 122.117 302.508 121.926 302.645 + c 121.293 303.211 120.793 303.906 120.457 304.688 c 120.133 305.469 119.977 + 306.316 120 307.164 c 120.148 307.977 120.348 308.777 120.602 309.562 c + 120.637 309.648 120.602 309.742 120.668 309.828 c 132.668 332.789 l 134.203 + 335.73 137.828 336.867 140.766 335.336 c 143.707 333.801 144.844 330.172 + 143.312 327.234 c 138.426 317.898 l 170.688 327.594 205.324 325.711 236.34 + 312.57 c 239.391 311.277 240.82 307.758 239.527 304.707 c h +239.527 304.707 m f +340.242 157.758 m 337.898 155.414 334.102 155.414 331.758 157.758 c 323.27 + 166.246 l 318.301 113.695 284.934 68.09 236.352 47.449 c 233.305 46.148 + 229.777 47.566 228.48 50.617 c 227.18 53.664 228.598 57.188 231.648 58.488 + c 275.23 77.012 305.527 117.488 311.012 164.527 c 304.242 157.758 l 301.859 + 155.457 298.059 155.523 295.758 157.906 c 293.512 160.23 293.512 163.918 + 295.758 166.242 c 313.758 184.242 l 316.098 186.59 319.898 186.594 322.242 + 184.254 c 322.246 184.25 322.25 184.246 322.254 184.242 c 340.254 166.242 + l 342.594 163.895 342.59 160.098 340.242 157.758 c h +340.242 157.758 m f +131.824 51.953 m 131.805 51.605 131.754 51.258 131.672 50.914 c 131.672 + 50.863 131.605 50.824 131.578 50.766 c 131.555 50.707 131.578 50.664 131.578 + 50.617 c 131.418 50.324 131.227 50.047 131.016 49.789 c 130.82 49.438 130.594 + 49.105 130.332 48.797 c 129.977 48.484 129.586 48.215 129.16 48 c 128.953 + 47.82 128.727 47.656 128.488 47.516 c 104.488 36.551 l 101.426 35.293 97.918 + 36.758 96.66 39.824 c 95.469 42.723 96.711 46.051 99.512 47.461 c 111.938 + 53.141 l 65.156 78.18 35.965 126.941 36 180 c 36 183.312 38.688 186 42 +186 c 45.312 186 48 183.312 48 180 c 47.969 133.281 72.676 90.035 112.938 + 66.336 c 108.602 75.402 l 107.168 78.383 108.422 81.961 111.402 83.398 +c 112.211 83.797 113.098 84 114 84 c 116.305 83.996 118.402 82.676 119.398 + 80.598 c 131.398 55.559 l 131.398 55.5 131.398 55.434 131.449 55.375 c +131.605 54.965 131.711 54.535 131.773 54.102 c 131.875 53.777 131.953 53.449 + 132 53.113 c 131.98 52.723 131.922 52.332 131.824 51.953 c h +131.824 51.953 m f +Q Q +showpage +%%Trailer +end +%%EOF diff --git a/tex/img/icon-collaboration.svg b/tex/img/icon-collaboration.svg new file mode 100644 index 0000000..32c42f7 --- /dev/null +++ b/tex/img/icon-collaboration.svg @@ -0,0 +1,107 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tex/img/icon-complete.eps b/tex/img/icon-complete.eps new file mode 100644 index 0000000..301df2b --- /dev/null +++ b/tex/img/icon-complete.eps @@ -0,0 +1,162 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: cairo 1.17.3 (https://cairographics.org) +%%CreationDate: Sat May 2 03:38:53 2020 +%%Pages: 1 +%%DocumentData: Clean7Bit +%%LanguageLevel: 2 +%%BoundingBox: 24 12 361 372 +%%EndComments +%%BeginProlog +50 dict begin +/q { gsave } bind def +/Q { grestore } bind def +/cm { 6 array astore concat } bind def +/w { setlinewidth } bind def +/J { setlinecap } bind def +/j { setlinejoin } bind def +/M { setmiterlimit } bind def +/d { setdash } bind def +/m { moveto } bind def +/l { lineto } bind def +/c { curveto } bind def +/h { closepath } bind def +/re { exch dup neg 3 1 roll 5 3 roll moveto 0 rlineto + 0 exch rlineto 0 rlineto closepath } bind def +/S { stroke } bind def +/f { fill } bind def +/f* { eofill } bind def +/n { newpath } bind def +/W { clip } bind def +/W* { eoclip } bind def +/BT { } bind def +/ET { } bind def +/BDC { mark 3 1 roll /BDC pdfmark } bind def +/EMC { mark /EMC pdfmark } bind def +/cairo_store_point { /cairo_point_y exch def /cairo_point_x exch def } def +/Tj { show currentpoint cairo_store_point } bind def +/TJ { + { + dup + type /stringtype eq + { show } { -0.001 mul 0 cairo_font_matrix dtransform rmoveto } ifelse + } forall + currentpoint cairo_store_point +} bind def +/cairo_selectfont { cairo_font_matrix aload pop pop pop 0 0 6 array astore + cairo_font exch selectfont cairo_point_x cairo_point_y moveto } bind def +/Tf { pop /cairo_font exch def /cairo_font_matrix where + { pop cairo_selectfont } if } bind def +/Td { matrix translate cairo_font_matrix matrix concatmatrix dup + /cairo_font_matrix exch def dup 4 get exch 5 get cairo_store_point + /cairo_font where { pop cairo_selectfont } if } bind def +/Tm { 2 copy 8 2 roll 6 array astore /cairo_font_matrix exch def + cairo_store_point /cairo_font where { pop cairo_selectfont } if } bind def +/g { setgray } bind def +/rg { setrgbcolor } bind def +/d1 { setcachedevice } bind def +/cairo_data_source { + CairoDataIndex CairoData length lt + { CairoData CairoDataIndex get /CairoDataIndex CairoDataIndex 1 add def } + { () } ifelse +} def +/cairo_flush_ascii85_file { cairo_ascii85_file status { cairo_ascii85_file flushfile } if } def +/cairo_image { image cairo_flush_ascii85_file } def +/cairo_imagemask { imagemask cairo_flush_ascii85_file } def +%%EndProlog +%%BeginSetup +%%EndSetup +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 24 12 361 372 +%%EndPageSetup +q 24 12 337 360 rectclip +1 0 0 -1 0 384 cm q +0.913725 0.933333 0.94902 rg +282 366 m 54 366 l 40.746 366 30 355.254 30 342 c 30 42 l 30 28.746 40.746 + 18 54 18 c 234 18 l 306 90 l 306 342 l 306 355.254 295.254 366 282 366 +c h +282 366 m f +0.764706 0.776471 0.780392 rg +306 90 m 258 90 l 244.746 90 234 79.254 234 66 c 234 18 l h +306 90 m f +0.458824 0.670588 0.490196 rg +354 288 m 354 331.078 319.078 366 276 366 c 232.922 366 198 331.078 198 + 288 c 198 244.922 232.922 210 276 210 c 319.078 210 354 244.922 354 288 + c h +354 288 m f +0 g +54 372 m 276 372 l 315.281 372.004 349.32 344.781 357.949 306.457 c 366.578 + 268.133 347.492 228.949 312 212.117 c 312 90 l 312 88.41 311.367 86.883 + 310.242 85.758 c 238.242 13.758 l 237.117 12.633 235.59 12 234 12 c 54 +12 l 37.438 12.02 24.02 25.438 24 42 c 24 342 l 24.02 358.562 37.438 371.98 + 54 372 c h +348 288 m 348 317.121 330.457 343.375 303.555 354.52 c 276.648 365.664 +245.68 359.504 225.09 338.91 c 204.496 318.32 198.336 287.352 209.48 260.445 + c 220.625 233.543 246.879 216 276 216 c 315.746 216.043 347.957 248.254 + 348 288 c h +240 32.484 m 291.516 84 l 258 84 l 248.062 83.988 240.012 75.938 240 66 + c h +36 42 m 36.012 32.062 44.062 24.012 54 24 c 228 24 l 228 66 l 228.02 82.562 + 241.438 95.98 258 96 c 300 96 l 300 207.5 l 259.438 195.398 216.207 215.371 + 199.133 254.105 c 182.059 292.836 196.48 338.223 232.777 360 c 54 360 l + 44.062 359.988 36.012 351.938 36 342 c h +36 42 m f +120 126 m 120 129.312 122.688 132 126 132 c 270 132 l 273.312 132 276 129.312 + 276 126 c 276 122.688 273.312 120 270 120 c 126 120 l 122.688 120 120 122.688 + 120 126 c h +120 126 m f +66 168 m 186 168 l 189.312 168 192 165.312 192 162 c 192 158.688 189.312 + 156 186 156 c 66 156 l 62.688 156 60 158.688 60 162 c 60 165.312 62.688 + 168 66 168 c h +66 168 m f +210 192 m 162 192 l 158.688 192 156 194.688 156 198 c 156 201.312 158.688 + 204 162 204 c 210 204 l 213.312 204 216 201.312 216 198 c 216 194.688 213.312 + 192 210 192 c h +210 192 m f +66 240 m 174 240 l 177.312 240 180 237.312 180 234 c 180 230.688 177.312 + 228 174 228 c 66 228 l 62.688 228 60 230.688 60 234 c 60 237.312 62.688 + 240 66 240 c h +66 240 m f +66 132 m 102 132 l 105.312 132 108 129.312 108 126 c 108 122.688 105.312 + 120 102 120 c 66 120 l 62.688 120 60 122.688 60 126 c 60 129.312 62.688 + 132 66 132 c h +66 132 m f +270 156 m 210 156 l 206.688 156 204 158.688 204 162 c 204 165.312 206.688 + 168 210 168 c 270 168 l 273.312 168 276 165.312 276 162 c 276 158.688 273.312 + 156 270 156 c h +270 156 m f +66 204 m 138 204 l 141.312 204 144 201.312 144 198 c 144 194.688 141.312 + 192 138 192 c 66 192 l 62.688 192 60 194.688 60 198 c 60 201.312 62.688 + 204 66 204 c h +66 204 m f +162 300 m 138 300 l 134.688 300 132 302.688 132 306 c 132 309.312 134.688 + 312 138 312 c 162 312 l 165.312 312 168 309.312 168 306 c 168 302.688 165.312 + 300 162 300 c h +162 300 m f +66 312 m 114 312 l 117.312 312 120 309.312 120 306 c 120 302.688 117.312 + 300 114 300 c 66 300 l 62.688 300 60 302.688 60 306 c 60 309.312 62.688 + 312 66 312 c h +66 312 m f +66 276 m 102 276 l 105.312 276 108 273.312 108 270 c 108 266.688 105.312 + 264 102 264 c 66 264 l 62.688 264 60 266.688 60 270 c 60 273.312 62.688 + 276 66 276 c h +66 276 m f +66 96 m 138 96 l 141.312 96 144 93.312 144 90 c 144 86.688 141.312 84 138 + 84 c 66 84 l 62.688 84 60 86.688 60 90 c 60 93.312 62.688 96 66 96 c h +66 96 m f +162 264 m 126 264 l 122.688 264 120 266.688 120 270 c 120 273.312 122.688 + 276 126 276 c 162 276 l 165.312 276 168 273.312 168 270 c 168 266.688 165.312 + 264 162 264 c h +162 264 m f +244.242 301.758 m 241.898 299.414 238.102 299.414 235.758 301.758 c 233.414 + 304.102 233.414 307.898 235.758 310.242 c 259.758 334.242 l 260.883 335.367 + 262.41 336 264 336 c 264.195 336 264.395 335.992 264.59 335.973 c 266.379 + 335.793 267.996 334.824 268.992 333.328 c 316.992 261.328 l 318.809 258.57 + 318.059 254.867 315.312 253.035 c 312.566 251.203 308.855 251.934 307.008 + 254.672 c 263.07 320.582 l h +244.242 301.758 m f +Q Q +showpage +%%Trailer +end +%%EOF diff --git a/tex/img/icon-complete.svg b/tex/img/icon-complete.svg new file mode 100644 index 0000000..3b3703e --- /dev/null +++ b/tex/img/icon-complete.svg @@ -0,0 +1,2 @@ + + diff --git a/tex/img/icon-processing.eps b/tex/img/icon-processing.eps new file mode 100644 index 0000000..ea7bb70 --- /dev/null +++ b/tex/img/icon-processing.eps @@ -0,0 +1,212 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: cairo 1.17.3 (https://cairographics.org) +%%CreationDate: Sat May 2 03:39:08 2020 +%%Pages: 1 +%%DocumentData: Clean7Bit +%%LanguageLevel: 2 +%%BoundingBox: 24 12 361 372 +%%EndComments +%%BeginProlog +50 dict begin +/q { gsave } bind def +/Q { grestore } bind def +/cm { 6 array astore concat } bind def +/w { setlinewidth } bind def +/J { setlinecap } bind def +/j { setlinejoin } bind def +/M { setmiterlimit } bind def +/d { setdash } bind def +/m { moveto } bind def +/l { lineto } bind def +/c { curveto } bind def +/h { closepath } bind def +/re { exch dup neg 3 1 roll 5 3 roll moveto 0 rlineto + 0 exch rlineto 0 rlineto closepath } bind def +/S { stroke } bind def +/f { fill } bind def +/f* { eofill } bind def +/n { newpath } bind def +/W { clip } bind def +/W* { eoclip } bind def +/BT { } bind def +/ET { } bind def +/BDC { mark 3 1 roll /BDC pdfmark } bind def +/EMC { mark /EMC pdfmark } bind def +/cairo_store_point { /cairo_point_y exch def /cairo_point_x exch def } def +/Tj { show currentpoint cairo_store_point } bind def +/TJ { + { + dup + type /stringtype eq + { show } { -0.001 mul 0 cairo_font_matrix dtransform rmoveto } ifelse + } forall + currentpoint cairo_store_point +} bind def +/cairo_selectfont { cairo_font_matrix aload pop pop pop 0 0 6 array astore + cairo_font exch selectfont cairo_point_x cairo_point_y moveto } bind def +/Tf { pop /cairo_font exch def /cairo_font_matrix where + { pop cairo_selectfont } if } bind def +/Td { matrix translate cairo_font_matrix matrix concatmatrix dup + /cairo_font_matrix exch def dup 4 get exch 5 get cairo_store_point + /cairo_font where { pop cairo_selectfont } if } bind def +/Tm { 2 copy 8 2 roll 6 array astore /cairo_font_matrix exch def + cairo_store_point /cairo_font where { pop cairo_selectfont } if } bind def +/g { setgray } bind def +/rg { setrgbcolor } bind def +/d1 { setcachedevice } bind def +/cairo_data_source { + CairoDataIndex CairoData length lt + { CairoData CairoDataIndex get /CairoDataIndex CairoDataIndex 1 add def } + { () } ifelse +} def +/cairo_flush_ascii85_file { cairo_ascii85_file status { cairo_ascii85_file flushfile } if } def +/cairo_image { image cairo_flush_ascii85_file } def +/cairo_imagemask { imagemask cairo_flush_ascii85_file } def +%%EndProlog +%%BeginSetup +%%EndSetup +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 24 12 361 372 +%%EndPageSetup +q 24 12 337 360 rectclip +1 0 0 -1 0 384 cm q +0.913725 0.933333 0.94902 rg +282 366 m 54 366 l 40.746 366 30 355.254 30 342 c 30 42 l 30 28.746 40.746 + 18 54 18 c 234 18 l 306 90 l 306 342 l 306 355.254 295.254 366 282 366 +c h +282 366 m f +0.764706 0.776471 0.780392 rg +306 90 m 258 90 l 244.746 90 234 79.254 234 66 c 234 18 l h +306 90 m f +0.431373 0.823529 0.941176 rg +354 288 m 354 331.078 319.078 366 276 366 c 232.922 366 198 331.078 198 + 288 c 198 244.922 232.922 210 276 210 c 319.078 210 354 244.922 354 288 + c h +354 288 m f +0 g +54 372 m 276 372 l 315.281 372.004 349.32 344.781 357.949 306.457 c 366.578 + 268.133 347.492 228.949 312 212.117 c 312 90 l 312 88.41 311.367 86.883 + 310.242 85.758 c 238.242 13.758 l 237.117 12.633 235.59 12 234 12 c 54 +12 l 37.438 12.02 24.02 25.438 24 42 c 24 342 l 24.02 358.562 37.438 371.98 + 54 372 c h +348 288 m 348 317.121 330.457 343.375 303.555 354.52 c 276.648 365.664 +245.68 359.504 225.09 338.91 c 204.496 318.32 198.336 287.352 209.48 260.445 + c 220.625 233.543 246.879 216 276 216 c 315.746 216.043 347.957 248.254 + 348 288 c h +240 32.484 m 291.516 84 l 258 84 l 248.062 83.988 240.012 75.938 240 66 + c h +36 42 m 36.012 32.062 44.062 24.012 54 24 c 228 24 l 228 66 l 228.02 82.562 + 241.438 95.98 258 96 c 300 96 l 300 207.5 l 259.438 195.398 216.207 215.371 + 199.133 254.105 c 182.059 292.836 196.48 338.223 232.777 360 c 54 360 l + 44.062 359.988 36.012 351.938 36 342 c h +36 42 m f +120 126 m 120 129.312 122.688 132 126 132 c 270 132 l 273.312 132 276 129.312 + 276 126 c 276 122.688 273.312 120 270 120 c 126 120 l 122.688 120 120 122.688 + 120 126 c h +120 126 m f +66 168 m 186 168 l 189.312 168 192 165.312 192 162 c 192 158.688 189.312 + 156 186 156 c 66 156 l 62.688 156 60 158.688 60 162 c 60 165.312 62.688 + 168 66 168 c h +66 168 m f +210 192 m 162 192 l 158.688 192 156 194.688 156 198 c 156 201.312 158.688 + 204 162 204 c 210 204 l 213.312 204 216 201.312 216 198 c 216 194.688 213.312 + 192 210 192 c h +210 192 m f +66 240 m 174 240 l 177.312 240 180 237.312 180 234 c 180 230.688 177.312 + 228 174 228 c 66 228 l 62.688 228 60 230.688 60 234 c 60 237.312 62.688 + 240 66 240 c h +66 240 m f +66 132 m 102 132 l 105.312 132 108 129.312 108 126 c 108 122.688 105.312 + 120 102 120 c 66 120 l 62.688 120 60 122.688 60 126 c 60 129.312 62.688 + 132 66 132 c h +66 132 m f +270 156 m 210 156 l 206.688 156 204 158.688 204 162 c 204 165.312 206.688 + 168 210 168 c 270 168 l 273.312 168 276 165.312 276 162 c 276 158.688 273.312 + 156 270 156 c h +270 156 m f +66 204 m 138 204 l 141.312 204 144 201.312 144 198 c 144 194.688 141.312 + 192 138 192 c 66 192 l 62.688 192 60 194.688 60 198 c 60 201.312 62.688 + 204 66 204 c h +66 204 m f +162 300 m 138 300 l 134.688 300 132 302.688 132 306 c 132 309.312 134.688 + 312 138 312 c 162 312 l 165.312 312 168 309.312 168 306 c 168 302.688 165.312 + 300 162 300 c h +162 300 m f +66 312 m 114 312 l 117.312 312 120 309.312 120 306 c 120 302.688 117.312 + 300 114 300 c 66 300 l 62.688 300 60 302.688 60 306 c 60 309.312 62.688 + 312 66 312 c h +66 312 m f +66 276 m 102 276 l 105.312 276 108 273.312 108 270 c 108 266.688 105.312 + 264 102 264 c 66 264 l 62.688 264 60 266.688 60 270 c 60 273.312 62.688 + 276 66 276 c h +66 276 m f +66 96 m 138 96 l 141.312 96 144 93.312 144 90 c 144 86.688 141.312 84 138 + 84 c 66 84 l 62.688 84 60 86.688 60 90 c 60 93.312 62.688 96 66 96 c h +66 96 m f +162 264 m 126 264 l 122.688 264 120 266.688 120 270 c 120 273.312 122.688 + 276 126 276 c 162 276 l 165.312 276 168 273.312 168 270 c 168 266.688 165.312 + 264 162 264 c h +162 264 m f +228.582 261.426 m 227.16 264.418 228.434 267.996 231.426 269.418 c 234.418 + 270.84 237.996 269.566 239.418 266.574 c 245.602 253.535 258.969 245.449 + 273.387 246.023 c 287.805 246.602 300.488 255.73 305.605 269.223 c 299.285 + 265.008 l 296.531 263.211 292.84 263.965 291.016 266.703 c 289.191 269.441 + 289.91 273.141 292.629 274.992 c 310.629 286.992 l 310.645 287 l 310.867 + 287.145 311.098 287.277 311.336 287.391 c 311.387 287.414 311.438 287.441 + 311.488 287.465 c 311.727 287.574 311.969 287.668 312.219 287.746 c 312.258 + 287.758 312.301 287.766 312.34 287.777 c 312.551 287.832 312.762 287.879 + 312.973 287.914 c 313.051 287.93 313.125 287.941 313.199 287.949 c 313.434 + 287.98 313.672 287.996 313.906 288 c 313.957 288 l 313.957 288 313.984 +288 314 288 c 314.215 288 314.426 287.984 314.637 287.961 c 314.695 287.957 + 314.754 287.945 314.812 287.934 c 314.984 287.91 315.148 287.883 315.312 + 287.844 c 315.375 287.828 315.441 287.812 315.5 287.797 c 315.664 287.754 + 315.82 287.703 315.977 287.645 c 316.035 287.625 316.098 287.602 316.156 + 287.578 c 316.32 287.516 316.48 287.441 316.637 287.363 c 316.684 287.34 + 316.73 287.32 316.777 287.297 c 316.973 287.195 317.16 287.078 317.34 286.957 + c 317.395 286.922 317.445 286.879 317.5 286.84 c 317.625 286.746 317.75 + 286.648 317.871 286.547 c 317.934 286.492 317.996 286.434 318.059 286.375 + c 318.172 286.27 318.277 286.16 318.383 286.047 c 318.43 285.992 318.48 + 285.941 318.531 285.887 c 318.684 285.711 318.824 285.523 318.953 285.332 + c 330.953 267.332 l 332.77 264.574 332.02 260.871 329.273 259.039 c 326.527 + 257.207 322.816 257.938 320.969 260.676 c 317.281 266.211 l 310.871 247.816 + 293.984 235.121 274.535 234.07 c 255.086 233.023 236.934 243.832 228.586 + 261.43 c h +228.582 261.426 m f +320.574 306.582 m 319.137 305.898 317.488 305.812 315.988 306.348 c 314.488 + 306.879 313.262 307.988 312.582 309.426 c 306.398 322.465 293.031 330.551 + 278.613 329.977 c 264.195 329.398 251.512 320.27 246.395 306.777 c 252.715 + 310.992 l 255.469 312.789 259.16 312.035 260.984 309.297 c 262.809 306.559 + 262.09 302.859 259.371 301.008 c 241.371 289.008 l 241.352 288.996 241.328 + 288.984 241.309 288.969 c 241.164 288.875 241.016 288.785 240.863 288.703 + c 240.805 288.676 240.746 288.648 240.688 288.621 c 240.57 288.562 240.453 + 288.504 240.332 288.453 c 240.25 288.422 240.164 288.395 240.078 288.363 + c 239.98 288.328 239.883 288.289 239.781 288.258 c 239.676 288.23 239.562 + 288.203 239.457 288.176 c 239.375 288.156 239.297 288.133 239.215 288.117 + c 239.078 288.09 238.945 288.07 238.809 288.055 c 238.75 288.047 238.691 + 288.035 238.633 288.031 c 238.457 288.012 238.285 288.008 238.113 288.004 + c 238.09 288.004 238.066 288.004 238.039 288.004 c 238.031 288.004 l 237.832 + 288.004 237.629 288.016 237.434 288.035 c 237.344 288.043 237.258 288.062 + 237.168 288.074 c 237.066 288.09 236.961 288.102 236.859 288.121 c 236.719 + 288.148 236.582 288.188 236.445 288.227 c 236.395 288.238 236.344 288.25 + 236.297 288.262 c 236.168 288.301 236.043 288.352 235.922 288.398 c 235.863 + 288.418 235.805 288.438 235.75 288.461 c 235.641 288.504 235.543 288.555 + 235.438 288.605 c 235.363 288.641 235.289 288.676 235.215 288.715 c 235.137 + 288.754 235.055 288.805 234.977 288.852 c 234.898 288.898 234.797 288.957 + 234.707 289.016 c 234.648 289.055 234.594 289.102 234.535 289.145 c 234.434 + 289.219 234.332 289.285 234.234 289.367 c 234.188 289.406 234.145 289.449 + 234.098 289.488 c 234 289.578 233.898 289.664 233.805 289.758 c 233.711 + 289.852 233.633 289.941 233.551 290.035 c 233.508 290.086 233.457 290.133 + 233.414 290.184 c 233.285 290.34 233.164 290.504 233.055 290.672 c 233.055 + 290.676 l 221.055 308.676 l 220.16 310 219.832 311.625 220.141 313.191 +c 220.449 314.758 221.371 316.137 222.699 317.023 c 224.027 317.906 225.656 + 318.227 227.219 317.91 c 228.785 317.594 230.16 316.664 231.039 315.332 + c 234.727 309.797 l 241.137 328.191 258.023 340.887 277.473 341.934 c 296.922 + 342.984 315.074 332.176 323.422 314.578 c 324.105 313.141 324.191 311.488 + 323.656 309.988 c 323.121 308.488 322.012 307.262 320.574 306.582 c h +320.574 306.582 m f +Q Q +showpage +%%Trailer +end +%%EOF diff --git a/tex/img/icon-processing.svg b/tex/img/icon-processing.svg new file mode 100644 index 0000000..b75b312 --- /dev/null +++ b/tex/img/icon-processing.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tex/src/figure-branching.tex b/tex/src/figure-branching.tex index a917987..52a6303 100644 --- a/tex/src/figure-branching.tex +++ b/tex/src/figure-branching.tex @@ -27,6 +27,11 @@ + + + + + \begin{tikzpicture} %% Just for a reference (so the image size always remains fixed). It also @@ -71,13 +76,13 @@ \branchcommit{red!60!green}{4.5cm}{5cm}{f69e1f4} \branchcommit{red!60!green}{4.5cm}{6cm}{716b56b} \node[inner sep=0pt] at (3.92cm,5.55cm) - {\includegraphics[width=6.5mm]{tex/img/icon-collaboration.pdf}}; + {\includegraphics[width=6.5mm]{tex/img/icon-collaboration.eps}}; %% Paper being processed icon. The white rectangle over it is to blend it %% into the background. \node[anchor=south,inner sep=0pt] at (3.92cm,6.7cm) - {\includegraphics[width=1cm]{tex/img/icon-processing.pdf}}; - \draw[white, fill=white, opacity=0.7] (3.42cm,6.7) rectangle (5cm,7.7cm); + {\includegraphics[width=1cm]{tex/img/icon-processing.eps}}; + \draw[white, fill=white, opacity=0.7] (3.42cm,6.7) rectangle (5cm,7.8cm); %% Description of this scenario: \draw [rounded corners, fill=black!10!white] (3.1cm,0) rectangle (7.5cm,1.25cm); @@ -131,7 +136,7 @@ %% Project commits. \branchcommit{red!60!green}{11cm}{2.5cm}{4483a81} \branchcommit{red!60!green}{11cm}{4.5cm}{\projectversion} - \node[anchor=south, inner sep=0pt, color=white] at (11.05cm,4.8cm) {\includegraphics[width=1cm]{tex/img/icon-complete.pdf}}; + \node[anchor=south, inner sep=0pt, color=white] at (11.05cm,4.8cm) {\includegraphics[width=1cm]{tex/img/icon-complete.eps}}; %% Derivate project commits. \branchcommit{purple!60!yellow}{12.5cm}{5cm}{b177c7e} diff --git a/tex/src/preamble-pgfplots.tex b/tex/src/preamble-pgfplots.tex index 0a7b5d5..eabdeb7 100644 --- a/tex/src/preamble-pgfplots.tex +++ b/tex/src/preamble-pgfplots.tex @@ -84,7 +84,7 @@ \tikzsetnextfilename{#1}% \input{tex/src/#1.tex}% \else - \includegraphics[width=\linewidth]{tex/tikz/#1.pdf} + \includegraphics[width=\linewidth]{tex/tikz/#1.eps} \fi } @@ -92,15 +92,17 @@ -%% Uncomment the following lines for EPS and PS images. Note that you still -%% have to use the `pdflatex' executable and also add a `[dvips]' option to -%% graphicx. - -%% \tikzset{external/system call={rm -f "\image".eps "\image".ps -%% "\image".dvi; latex \tikzexternalcheckshellescape -halt-on-error -%% -interaction=batchmode -jobname "\image" "\texsource"; -%% dvips -o "\image".ps "\image".dvi; -%% ps2eps "\image.ps"}} +%% Uncomment the following lines for TiKZ external images to be saved as +%% EPS and PS images. +\tikzset{ + external/system call={ + rm -f "\image".eps "\image".ps "\image".dvi; + latex \tikzexternalcheckshellescape -halt-on-error + -interaction=batchmode -jobname "\image" "\texsource"; + dvips -o "\image".ps "\image".dvi; + ps2eps "\image.ps" + } +} diff --git a/tex/src/preamble-project.tex b/tex/src/preamble-project.tex new file mode 100644 index 0000000..c4d7feb --- /dev/null +++ b/tex/src/preamble-project.tex @@ -0,0 +1,27 @@ +%% Import graphics +\usepackage{graphicx} + +%% IEEEtran V1.6 and later pre-defines the format of the cite.sty package +%% \cite{} output to follow that of the IEEE. +\usepackage{cite} + +%% For the `\url' command. +\usepackage{url} + +% correct bad hyphenation here +\hyphenation{op-tical net-works semi-conduc-tor} + +%% To use colors. +\usepackage{xcolor} + +%% To have links. +\usepackage[ + colorlinks, + urlcolor=gray, + citecolor=gray, + linkcolor=gray, + linktocpage]{hyperref} +\renewcommand\UrlFont{\rmfamily} + +%% To have typewriter font +\usepackage{courier} diff --git a/tex/src/references.bib b/tex/src/references.bib new file mode 100644 index 0000000..e19ec16 --- /dev/null +++ b/tex/src/references.bib @@ -0,0 +1,1772 @@ +@ARTICLE{clement19, + author = {Cl\'ement-Fontaine, M\'elanie and Di Cosmo, Roberto and Guerry, Bastien and MOREAU, Patrick and Pellegrini, Fran\c cois}, + title = {Encouraging a wider usage of software derived from research}, + year = {2019}, + journal = {Archives ouvertes HAL}, + volume = {}, + pages = {\href{https://hal.archives-ouvertes.fr/hal-02545142}{hal-02545142}}, +} + + + + + +@ARTICLE{dicosmo20, + author = {{Di Cosmo}, Roberto and {Gruenpeter}, Morane and {Zacchiroli}, Stefano}, + title = "{Referencing Source Code Artifacts: a Separate Concern in Software Citation}", + journal = {Computing in Science \& Engineering}, + year = 2020, + volume = 22, + eid = {arXiv:2001.08647}, + pages = {33}, +archivePrefix = {arXiv}, + eprint = {2001.08647}, + primaryClass = {cs.DL}, + doi = {10.1109/MCSE.2019.2963148}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200108647D}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{menke20, + author = {Joe Menke and Martijn Roelandse and Burak Ozyurt and Maryann Martone and Anita Bandrowski}, + title = {Rigor and Transparency Index, a new metric of quality for assessing biological and medical science methods}, + year = {2020}, + journal = {bioRxiv}, + volume = {}, + pages = {2020.01.15.908111}, + doi = {10.1101/2020.01.15.908111}, +} + + + + + +@ARTICLE{konkol20, + author = {{Konkol}, Markus and {N{\"u}st}, Daniel and {Goulier}, Laura}, + title = "{Publishing computational research -- A review of infrastructures for reproducible and transparent scholarly communication}", + journal = {arXiv}, + year = 2020, + month = jan, + pages = {2001.00484}, +archivePrefix = {arXiv}, + eprint = {2001.00484}, + primaryClass = {cs.DL}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200100484K}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{infante20, + author = {{Infante-Sainz}, Ra{\'u}l and {Trujillo}, Ignacio and + {Rom{\'a}n}, Javier}, + title = "{The Sloan Digital Sky Survey extended point spread functions}", + journal = {Monthly Notices of the Royal Astronomical Society}, + keywords = {instrumentation: detectors, methods: data analysis, techniques: image processing, techniques: photometric, galaxies: haloes, Astrophysics - Instrumentation and Methods for Astrophysics, Astrophysics - Astrophysics of Galaxies}, + year = "2020", + month = "Feb", + volume = {491}, + number = {4}, + pages = {5317}, + doi = {10.1093/mnras/stz3111}, +archivePrefix = {arXiv}, + eprint = {1911.01430}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020MNRAS.491.5317I}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{gibney20, + author = {Elizabeth Gibney}, + title = {This AI researcher is trying to ward off a reproducibility crisis}, + year = {2020}, + journal = {Nature}, + volume = {577}, + pages = {14}, + doi = {10.1038/d41586-019-03895-5}, +} + + + + + +@ARTICLE{pimentel19, + author = {{Jo\~ao Felipe} Pimentel and Leonardo Murta and Vanessa Braganholo and Juliana Freire}, + title = {A large-scale study about quality and reproducibility of jupyter notebooks}, + year = {2019}, + journal = {Proceedings of the 16th International Conference on Mining Software Repositories}, + volume = {1}, + pages = {507}, + doi = {10.1109/MSR.2019.00077}, +} + + + + + +@ARTICLE{miksa19a, + author = {Tomasz Miksa and Paul Walk and Peter Neish}, + title = {RDA DMP Common Standard for Machine-actionable Data Management Plans}, + year = {2019}, + journal = {RDA}, + pages = {doi:10.15497/rda00039}, + doi = {10.15497/rda00039}, +} + + + + + +@ARTICLE{miksa19b, + author = {Tomasz Miksa and Stephanie Simms and Daniel Mietchen and Sarah Jones}, + title = {Ten principles for machine-actionable data management plans}, + year = {2019}, + journal = {PLoS Computational Biology}, + volume = {15}, + pages = {e1006750}, + doi = {10.1371/journal.pcbi.1006750}, +} + + + + + +@ARTICLE{dicosmo19, + author = {Roberto {Di Cosmo} and Francois Pellegrini}, + title = {Encouraging a wider usage of software derived from research}, + year = {2019}, + journal = {\doihref{https://www.ouvrirlascience.fr/wp-content/uploads/2020/02/Opportunity-Note_software-derived-from-research_EN.pdf}{Ouvrir la science}}, + volume = {}, + pages = {}, + doi = {}, +} + + + + + +@ARTICLE{perignon19, + author = {Christophe P\'erignon and Kamel Gadouche and Christophe Hurlin and Roxane Silberman and Eric Debonnel}, + title = {Certify reproducibility with confidential data}, + year = {2019}, + journal = {Science}, + volume = {365}, + pages = {127}, + doi = {10.1126/science.aaw2825}, +} + + + + + +@ARTICLE{munafo19, + author = {Marcus Munaf\'o}, + title = {Raising research quality will require collective action}, + year = {2019}, + journal = {Nature}, + volume = {576}, + pages = {183}, + doi = {10.1038/d41586-019-03750-7}, +} + + + + + +@ARTICLE{jones19, + author = {{Jones}, M.~G. and {Verdes-Montenegro}, L. and {Damas-Segovia}, A. and + {Borthakur}, S. and {Yun}, M. and {del Olmo}, A. and {Perea}, J. and + {Rom{\'a}n}, J. and {Luna}, S. and {Lopez Gutierrez}, D. and + {Williams}, B. and {Vogt}, F.~P.~A. and {Garrido}, J. and + {Sanchez}, S. and {Cannon}, J. and {Ram{\'\i}rez-Moreta}, P.}, + title = "{Evolution of compact groups from intermediate to final stages. A case study of the H I content of HCG 16}", + journal = {Astronomy \& Astrophysics}, + eprint = {1910.03420}, + keywords = {galaxies: groups: individual: HCG 16, galaxies: interactions, galaxies: evolution, galaxies: ISM, radio lines: galaxies}, + year = "2019", + month = "Dec", + volume = {632}, + eid = {A78}, + pages = {A78}, + doi = {10.1051/0004-6361/201936349}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019A&A...632A..78J}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{banek19, + author = {{Banek}, Christine and {Thornton}, Adam and {Economou}, Frossie and + {Fausti}, Angelo and {Krughoff}, K. Simon and {Sick}, Jonathan}, + title = "{Why is the LSST Science Platform built on Kubernetes?}", + journal = {Proceedings of ADASS XXIX}, + volume = {arXiv}, + keywords = {Astrophysics - Instrumentation and Methods for Astrophysics}, + year = "2019", + month = "Nov", + eid = {arXiv:1911.06404}, + pages = {1911.06404}, +archivePrefix = {arXiv}, + eprint = {1911.06404}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv191106404B}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{fineberg19, + author = {Harvey V. Fineberg and David B. Allison and Lorena A. Barba and Dianne Chong and David L. Donoho and Juliana Freire and Gerald Gabrielse and Constantine Gatsonis and Edward Hall and Thomas H. Jordan and Dietram A. Scheufele and Victoria Stodden and Simine Vazire, Timothy D. Wilson and Wendy Wood and Jennifer Heimberg and Thomas Arrison and Michael Cohen and Michele Schwalbe and Adrienne Stith Butler and Barbara A. Wanchisen and Tina Winters and Rebecca Morgan and Thelma Cox and Lesley Webb and Garret Tyson and Erin Hammers Forstag}, + title = {Reproducibility and Replicability in Science}, + journal = {The National Academies Press}, + year = 2019, + pages = {1}, + doi = {10.17226/25303}, +} + + + + + +@ARTICLE{akhlaghi19, + author = {{Akhlaghi}, Mohammad}, + title = "{Carving out the low surface brightness universe with NoiseChisel}", + journal = {IAU Symposium 355}, + volume = {arXiv}, + keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Astrophysics - Astrophysics of Galaxies, Computer Science - Computer Vision and Pattern Recognition}, + year = "2019", + month = "Sep", + eid = {arXiv:1909.11230}, + pages = {1909.11230}, +archivePrefix = {arXiv}, + eprint = {1909.11230}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190911230A}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{cribbs19, + author = {Cribbs, AP and Luna-Valero, S and George, C and Sudbery, IM and Berlanga-Taylor, AJ and Sansom, SN and Smith, T and Ilott, NE and Johnson, J and Scaber, J and Brown, K and Sims, D and Heger, A}, + title = {CGAT-core: a python framework for building scalable, reproducible computational biology workflows [version 2; peer review: 1 approved, 1 approved with reservations]}, + journal = {F1000Research}, + year = 2019, + volume = 8, + pages = {377}, + doi = {10.12688/f1000research.18674.2}, +} + + + + + +@ARTICLE{brinckman19, +author = "Adam Brinckman and Kyle Chard and Niall Gaffney and Mihael Hategan and Matthew B. Jones and Kacper Kowalik and Sivakumar Kulasekaran and Bertram Ludäscher and Bryce D. Mecum and Jarek Nabrzyski and Victoria Stodden and Ian J. Taylor and Matthew J. Turk and Kandace Turner", + title = {Computing environments for reproducibility: Capturing the ``Whole Tale''}, + journal = {Future Generation Computer Systems}, + year = 2019, + volume = 94, + pages = 854, + doi = {10.1016/j.future.2017.12.029}, +} + + + + + +@ARTICLE{uhse19, + author = {Uhse, Simon and Pflug, Florian G. and {von Haeseler}, Arndt and Djamei, Armin}, + title = {Insertion Pool Sequencing for Insertional Mutant Analysis in Complex Host‐Microbe Interactions}, + journal = {Current Protocols in Plant Biology}, + volume = {4}, + year = "2019", + month = "July", + pages = {e20097}, + doi = {10.1002/cppb.20097}, +} + + + + + +@ARTICLE{alliez19, + author = {{Alliez}, Pierre and {Di Cosmo}, Roberto and {Guedj}, Benjamin and + {Girault}, Alain and {Hacid}, Mohand-Said and {Legrand}, Arnaud and + {Rougier}, Nicolas P.}, + title = "{Attributing and Referencing (Research) Software: Best Practices and Outlook from Inria}", + journal = {Computing in Science \& Engineering}, + volume = {22}, + keywords = {Computer Science - Digital Libraries, Computer Science - Software Engineering}, + year = "2019", + month = "May", + pages = {39}, +archivePrefix = {arXiv}, + eprint = {1905.11123}, + primaryClass = {cs.DL}, + doi = {10.1109/MCSE.2019.2949413}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190511123A}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{kneller19, + author = {Kneller,Gerald R. and Hinsen,Konrad}, + title = {Memory effects in a random walk description of protein structure ensembles}, + journal = {The Journal of Chemical Physics}, + volume = {150}, + year = {2019}, + pages = {064911}, + doi = {10.1063/1.5054887}, +} + + + + + +@article{tange18, + author = {Tange, Ole}, + title = {GNU Parallel 2018}, + Journal = {Zenodo}, + volume = {1146014}, + pages = {\href{https://doi.org/10.5281/zenodo.1146014}{DOI:10.5281/zenodo.1146014}}, + year = 2018, + ISBN = {9781387509881}, + doi = {10.5281/zenodo.1146014}, + url = {https://doi.org/10.5281/zenodo.1146014} +} + + + + + +@ARTICLE{rule18, + author = {Adam Rule and Aur\'elien Tabard and {James D.} Hollan}, + title = {Exploration and Explanation in Computational Notebooks}, + journal = {Proceedings of the 2018 CHI Conference on Human Factors in Computing Systems}, + volume = {1}, + year = {2018}, + pages = {30}, + doi = {10.1145/3173574.3173606}, +} + + + + + +@ARTICLE{plesser18, + author = {Hans E. Plesser}, + title = {Reproducibility vs. Replicability: A Brief History of a Confused Terminology}, + journal = {Frontiers in Neuroinformatics}, + volume = {11}, + year = {2018}, + pages = {76}, + doi = {10.3389/fninf.2017.00076}, +} + + + + + +@ARTICLE{zhang18, + author = {{Zhang}, Zhi-Yu and {Romano}, D. and {Ivison}, R.~J. and + {Papadopoulos}, Padelis P. and {Matteucci}, F.}, + title = "{Stellar populations dominated by massive stars in dusty starburst galaxies across cosmic time}", + journal = {Nature}, + keywords = {Astrophysics - Astrophysics of Galaxies}, + year = "2018", + month = "Jun", + volume = {558}, + number = {7709}, + pages = {260}, + doi = {10.1038/s41586-018-0196-x}, +archivePrefix = {arXiv}, + eprint = {1806.01280}, + primaryClass = {astro-ph.GA}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2018Natur.558..260Z}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{smart18, + author = {{Smart}, A.G.}, + title = {The war over supercooled water}, + journal = {Physics Today}, + volume = {Aug}, + year = "2018", + pages = {DOI:10.1063/PT.6.1.20180822a}, + doi = {10.1063/PT.6.1.20180822a}, +} + + + + + +@ARTICLE{kaiser18, + author = {{Kaiser}, J.}, + title = {Plan to replicate 50 high-impact cancer papers shrinks to just 18}, + journal = {Science}, + volume = {Jul}, + year = "2018", + pages = {31}, + doi = {10.1126/science.aau9619}, +} + + + + + +@ARTICLE{dicosmo18, + author = {{Di Cosmo}, Roberto and {Gruenpeter}, Morane and {Zacchiroli}, Stefano}, + title = {Identifiers for Digital Objects: The case of software source code preservation}, + journal = {Proceedings of iPRES 2018}, + year = "2018", + pages = {204.4}, + doi = {10.17605/osf.io/kde56}, +} + + + + + +@ARTICLE{gruning18, + author = {Gr\"uning, Bj\"orn and Chilton, John and K\"oster, Johannes and Dale, Ryan and Soranzo, Nicola and {van den Beek}, Marius and Goecks, Jeremy and Backofen, Rolf and Nekrutenko, Anton and Taylor, James}, + title = {Practical Computational Reproducibility in the Life Sciences}, + journal = {Cell Systems}, + volume = 6, + year = "2018", + pages = {631. bioRxiv:\href{https://www.biorxiv.org/content/10.1101/200683v2}{200683}}, + doi = {10.1016/j.cels.2018.03.014}, +} + + + + + +@ARTICLE{allen18, + author = {{Allen}, Alice and {Teuben}, Peter J. and {Ryan}, P. Wesley}, + title = "{Schroedinger's Code: A Preliminary Study on Research Source Code Availability and Link Persistence in Astrophysics}", + journal = {The Astrophysical Journal Supplement Series}, + keywords = {methods: numerical, Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Digital Libraries}, + year = "2018", + month = "May", + volume = {236}, + number = {1}, + eid = {10}, + pages = {10}, + doi = {10.3847/1538-4365/aab764}, +archivePrefix = {arXiv}, + eprint = {1801.02094}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2018ApJS..236...10A}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{burrell18, + author = {{Burrell}, A.G. and {Halford}, A. and {Klenzing}, J. and {Stoneback}, R.A. and {Morley}, S.K. and {Annex}, A.M. and {Laundal}, K.M. and {Kellerman}, A.C. and {Stansby}, D. and {Ma}, J.}, + title = {Snakes on a Spaceship—An Overview of Python in Heliophysics}, + journal = {Journal of Geophysical Research: Space Physics}, + volume = {123}, + year = "2018", + pages = {384}, + doi = {10.1029/2018JA025877}, +} + + + + + +@article{stodden18, + author = {{Stodden}, V. and {Seiler}, J. and {Ma}, Z.}, + title = {An empirical analysis of journal policy effectiveness for computational reproducibility}, + volume = {115}, + number = {11}, + pages = {2584}, + year = {2018}, + doi = {10.1073/pnas.1708290115}, + issn = {0027-8424}, + URL = {https://www.pnas.org/content/115/11/2584}, + journal = {Proceedings of the National Academy of Sciences} +} + + + + + +@article {fanelli18, + author = {{Fanelli}, D.}, + title = {Opinion: Is science really facing a reproducibility crisis, and do we need it to?}, + volume = {115}, + number = {11}, + pages = {2628}, + year = {2018}, + doi = {10.1073/pnas.1708272114}, + publisher = {National Academy of Sciences}, + issn = {0027-8424}, + URL = {https://www.pnas.org/content/115/11/2628}, + journal = {Proceedings of the National Academy of Sciences} +} + + + + + + +@ARTICLE{lewis18, + author = {{Lewis}, L.M. and {Edwards}, M.C. and {Meyers}, Z.R. and {Conover Talbot}, C. and {Hao}, H. and {Blum}, D. }, + title = "{Replication Study: Transcriptional amplification in tumor cells with elevated c-Myc}", + journal = {eLife}, + volume = {7}, + year = "2018", + month = "January", + pages = {e30274}, + doi = {10.7554/eLife.30274}, +} + + + + + +@ARTICLE{akhlaghi18b, + author = {{Akhlaghi}, Mohammad and {Bacon}, Roland and {Inami}, Hanae}, + title = "{MUSE HUDF survey I \& II, Sections 7.3 \& 3.4: photometry for objects with no prior broad-band segmentation map}", + journal = {Zenodo}, + pages = {DOI:10.5281/zenodo.1164774}, + year = "2018", + month = "February", + doi = {10.5281/zenodo.1164774}, +} + + + + + +@ARTICLE{akhlaghi18a, + author = {{Akhlaghi}, Mohammad and {Bacon}, Roland}, + title = "{MUSE HUDF survey I, Section 4: data and reproduction pipeline for photometry and astrometry}", + journal = {Zenodo}, + pages = {DOI:10.5281/zenodo.1163746}, + year = "2018", + month = "January", + doi = {10.5281/zenodo.1163746}, +} + + + + + +@ARTICLE{leek17, + author = {Jeffrey T. Leek and Leah R. Jager}, + title = {Is Most Published Research Really False?}, + journal = {Annual Review of Statistics and Its Application}, + volume = {4}, + year = {2017}, + pages = {109}, + doi = {10.1146/annurev-statistics-060116-054104}, +} + + + + + +@ARTICLE{reich17, + author = {Michael Reich and Thorin Tabor and Ted Liefeld and Helga Thorvaldsdóttir and Barbara Hill and Pablo Tamayo and Jill P. Mesirov}, + title = {The GenePattern Notebook Environment}, + journal = {Cell Systems}, + year = {2017}, + volume = {5}, + pages = {149}, + doi = {10.1016/j.cels.2017.07.003}, +} + + + + + +@ARTICLE{becker17, + author = {Gabriel Becker and Cory Barr and Robert Gentleman and Michael Lawrence}, + title = {Enhancing Reproducibility and Collaboration via Management of R Package Cohorts}, + journal = {Journal of Statistical Software, Articles}, + volume = {82}, + pages = 1, + year = "2017", +archivePrefix = {arXiv}, + eprint = {1501.02284}, + doi = {10.18637/jss.v082.i01}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150102284B}, +} + + + + + +@ARTICLE{jenness17, + author = {{Jenness}, Tim}, + title = "{Modern Python at the Large Synoptic Survey Telescope}", + journal = {ADASS 27}, + year = "2017", + month = "Dec", + eid = {arXiv:1712.00461}, + pages = {arXiv:1712.00461}, +archivePrefix = {arXiv}, + eprint = {1712.00461}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2017arXiv171200461J}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@article{bezanson17, + title={Julia: A fresh approach to numerical computing}, + author={Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B}, + journal={SIAM {R}eview}, + volume={59}, + number={1}, + pages={65}, + year={2017}, + archivePrefix={arXiv}, + eprint={1411.1607}, + publisher={SIAM}, + doi={10.1137/141000671}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2014arXiv1411.1607B}, +} + + + + + +@ARTICLE{melson17, + author = {{Melsen}, L.A. and {Torfs}, P.J.J.F and {Uijlenhoet}, R. and {Teuling}, A.J.}, + title = {Comment on “Most computational hydrology is not reproducible, so is it really science?” by Christopher Hutton et al.}, + journal = {Water Resources Research}, + volume = 53, + pages = {2568}, + year = {2017}, + doi = {10.1002/2016WR020208}, +} + + + + + +@ARTICLE{munafo17, + author = {{Munaf\'o}, M.R. and {Nosek}, B.A. and {Bishop}, D.V.M. and {Button}, K.S. and {Chambers}, C.D. and {Percie du Sert}, N. and {Simonsohn}, U. and {Wagenmakers}, E.J. and {Ware}, J.J. {Ioannidis}, J.P.A.}, + title = {A manifesto for reproducible science}, + journal = {Nature Human Behaviour}, + volume = 1, + pages = {21}, + year = {2017}, + doi = {10.1038/s41562-016-0021}, +} + + + + + +@ARTICLE{jimenez17, + title={The popper convention: Making reproducible systems evaluation practical}, + author = {{Jimenez}, I. and {Sevilla}, M. and {Watkins}, N. and {Maltzahn}, C. and {Lofstead}, J. and {Mohror}, K. and {Arpaci-Dusseau}, A. and {Arpaci-Dusseau}, R.}, + journal = {IEEE IPDPSW}, + pages = {1561}, + year = {2017}, + doi = {10.1109/IPDPSW.2017.157}, +} + + + + + +@ARTICLE{bacon17, + author = {{Bacon}, Roland and {Conseil}, Simon and {Mary}, David and + {Brinchmann}, Jarle and {Shepherd}, Martin and {Akhlaghi}, Mohammad and + {Weilbacher}, Peter M. and {Piqueras}, Laure and {Wisotzki}, Lutz and + {Lagattuta}, David and {Epinat}, Benoit and {Guerou}, Adrien and + {Inami}, Hanae and {Cantalupo}, Sebastiano and + {Courbot}, Jean Baptiste and {Contini}, Thierry and {Richard}, Johan and + {Maseda}, Michael and {Bouwens}, Rychard and {Bouch{\'e}}, Nicolas and + {Kollatschny}, Wolfram and {Schaye}, Joop and {Marino}, Raffaella Anna and + {Pello}, Roser and {Herenz}, Christian and {Guiderdoni}, Bruno and + {Carollo}, Marcella}, + title = "{The MUSE Hubble Ultra Deep Field Survey. I. Survey description, data reduction, and source detection}", + journal = {Astronomy \& Astrophysics}, + keywords = {galaxies: distances and redshifts, galaxies: high-redshift, cosmology: observations, methods: data analysis, techniques: imaging spectroscopy, galaxies: formation, Astrophysics - Astrophysics of Galaxies}, + year = "2017", + month = "Nov", + volume = {608}, + eid = {A1}, + pages = {A1}, + doi = {10.1051/0004-6361/201730833}, +archivePrefix = {arXiv}, + eprint = {1710.03002}, + primaryClass = {astro-ph.GA}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2017A\&A...608A...1B}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{austin17, + author = {{Claire C.} Austin and Theodora Bloom and Sünje Dallmeier-Tiessen and {Varsha K.} Khodiyar and Fiona Murphy and Amy Nurnberger and Lisa Raymond and Martina Stockhause and Jonathan Tedds and Mary Vardigan and Angus Whyte}, + title = {Key components of data publishing: using current best practices to develop a reference model for data publishing}, + journal = {International Journal on Digital Libraries}, + volume = {18}, + year = {2017}, + pages = {77}, + doi = {10.1007/s00799-016-0178-2}, +} + + + + + +@ARTICLE{smith16, + author = {Arfon M. Smith and Daniel S. Katz and Kyle E. Niemeyer}, + title = {Software citation principles}, + journal = {PeerJ Computer Science}, + volume = {2}, + year = {2016}, + pages = {e86}, + doi = {10.7717/peerj-cs.86}, +} + + + + + +@ARTICLE{ziemann16, + author = {Mark Ziemann and Yotam Eren and Assam El-Osta}, + title = {Gene name errors are widespread in the scientific literature}, + journal = {Genome Biology}, + volume = {17}, + year = {2016}, + pages = {177}, + doi = {10.1186/s13059-016-1044-7}, +} + + + + + +@ARTICLE{hinsen16, + author = {Konrad Hinsen}, + title = {Scientific notations for the digital era}, + journal = {The Self Journal of Science}, + year = {2016}, + pages = {1: arXiv:\href{https://arxiv.org/abs/1605.02960}{1605.02960}}, +} + + + + + +@ARTICLE{kluyver16, + author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando Pérez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Damián Avila and Safia Abdalla and Carol Willing}, + title = "{Jupyter Notebooks – a publishing format for reproducible computational workflows}", + journal = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, + year = {2016}, + pages = {87}, + doi = {10.3233/978-1-61499-649-1-87}, +} + + + + + +@ARTICLE{baker16, + author = {{Baker}, M.}, + title = "{Is there a reproducibility crisis?}", + journal = {Nature}, + volume = {533}, + year = "2016", + month = "May", + pages = {452}, + doi = {10.1038/533452a}, +} + + + + + +@ARTICLE{wilkinson16, + author = { {Wilkinson}, M.D and {Dumontier}, M. and {Aalbersberg}, I.J. and {Appleton}, G. and {Axton}, M. and {Baak}, A. and {Blomberg}, N. and {Boiten}, J. and {da Silva Santos}, L.B and {Bourne}, P.E. and {Bouwman}, J. and {Brookes}, A.J. and {Clark}, T. and {Crosas}, M. and {Dillo}, I. and {Dumon}, O. and {Edmunds}, S. and {Evelo}, C. and {Finkers}, R. and {Gonzalez-Beltran}, A. and {Gray}, A.J.G. and {Groth}, P. and {Goble}, C. and {Grethe}, Jeffrey S. and {Heringa}, J. and {’t Hoen}, P.A.C and {Hooft}, R. and {Kuhn}, T. and {Kok}, R. and {Kok}, J. and {Lusher}, S. and {Martone}, M. and {Mons}, A. and {Packer}, A. and {Persson}, B. and {Rocca-Serra}, P. and {Roos}, M. and {van Schaik}, R. and {Sansone}, S. and {Schultes}, E. and {Sengstag}, T. and {Slater}, T. and {Strawn}, G. and {Swertz}, M. and {Thompson}, M. and {van der Lei}, J. and {van Mulligen}, E. and {Velterop}, J. and {Waagmeester}, A. and {Wittenburg}, P. and {Wolstencroft}, K. and {Zhao}, J. and {Mons}, B.}, + title = "{The FAIR Guiding Principles for scientific data management and stewardship}", + journal = {Scientific Data}, + year = 2016, + month = mar, + volume = 3, + pages = {160018}, + doi = {10.1038/sdata.2016.18}, +} + + + + +@ARTICLE{hutton16, + author = {{Hutton}, C. and {Wagener}, T. and {Freer}, J. and {Han}, D. and {Duffy}, C. and {Arheimer}, B.}, + title = {Most computational hydrology is not reproducible, so is it really science?}, + journal = {Water Resources Research}, + year = {2016}, + volume = 52, + pages = {7548}, + doi = {10.1002/2016WR019285}, +} + + + + + +@ARTICLE{topalidou16, + author = {{Topalidou}, M. and {Leblois}, A. and {Boraud}, T. and {Rougier}, N.P.}, + title = {A long journey into reproducible computational neuroscience}, + journal = {Frontiers in Computational Neuroscience}, + year = {2016}, + volume = 9, + pages = {30}, + doi = {10.3389/fncom.2015.00030}, +} + + + + + +@ARTICLE{gil16, + author = {{Gil}, Yolanda and {David}, C.H. and {Demir}, I. and {Essawy}, B.T. and {Fulweiler}, R.W. and {Goodall}, J.L. and {Karlstrom}, L. and {Lee}, H. and {Mills}, H.J. and {Oh}, J. and {Pierce}, S.A. and {Pope}, A. and {Tzeng}, M.W. and {Villamizar}, S.R. and {Yu}, X}, + title = {Toward the Geoscience Paper of the Future: Best practices for documenting and sharing research from data to software to provenance}, + journal = {Earth and Space Science}, + year = 2016, + volume = 3, + pages = {388}, + doi = {10.1002/2015EA000136}, +} + + + + + +@ARTICLE{romine15, + author = {Charles H. Romine}, + title = {Secure Hash Standard (SHS)}, + journal = {Federal Information processing standards publication}, + volume = {180}, + pages = {4}, + year = {2015}, + doi = {10.6028/NIST.FIPS.180-4}, +} + + + + + +@ARTICLE{horvath15, + author = {Steve Horvath}, + title = {Erratum to: DNA methylation age of human tissues and cell types}, + journal = {Genome Biology}, + volume = {16}, + pages = {96}, + year = {2015}, + doi = {10.1186/s13059-015-0649-6}, +} + + + + + +@ARTICLE{chang15, + author = {Andrew C. Chang and Phillip Li}, + title = {Is Economics Research Replicable? Sixty Published Papers from Thirteen Journals Say ``Usually Not''}, + journal = {Finance and Economics Discussion Series 2015-083}, + year = {2015}, + pages = {1}, + doi = {10.17016/FEDS.2015.083}, +} + + + + + +@ARTICLE{schaffer15, + author = {Jonathan Schaffer}, + title = {What Not to Multiply Without Necessity}, + journal = {Australasian Journal of Philosophy}, + volume = {93}, + pages = {644}, + year = {2015}, + doi = {10.1080/00048402.2014.992447}, +} + + + + + +@ARTICLE{clarkso15, + author = "Chris Clarkson and Mike Smith and Ben Marwick and Richard Fullagar and Lynley A. Wallis and Patrick Faulkner and Tiina Manne and Elspeth Hayes and Richard G. Roberts and Zenobia Jacobs and Xavier Carah and Kelsey M. Lowe and Jacqueline Matthews and S. Anna Florin", + title = {The archaeology, chronology and stratigraphy of Madjedbebe (Malakunanja II): A site in northern Australia with early occupation}, + journal = {Journal of Human Evolution}, + year = 2015, + volume = 83, + pages = 46, + doi = {10.1016/j.jhevol.2015.03.014}, +} + + + + + +@ARTICLE{meng15, + author = {Haiyan Meng and Rupa Kommineni and Quan Pham and Robert Gardner and Tanu Malik and Douglas Thain}, + title = {An invariant framework for conducting reproducible computational science}, + journal = {Journal of Computational Science}, + year = 2015, + volume = 9, + pages = 137, + doi = {10.1016/j.jocs.2015.04.012}, +} + + + + + +@ARTICLE{gamblin15, + author = {Gamblin, Todd and LeGendre, Matthew and Collette, Michael R. and Lee, Gregory L. and Moody, Adam and {de Supinski}, Bronis R. and Futral, Scott}, + title = {The Spack package manager: bringing order to HPC software chaos}, + journal = {IEEE SC15}, + year = 2015, + volume = 1, + pages = {1}, + doi = {10.1145/2807591.2807623}, +} + + + + +@ARTICLE{akhlaghi15, + author = {{Akhlaghi}, M. and {Ichikawa}, T.}, + title = "{Noise-based Detection and Segmentation of Nebulous Objects}", + journal = {The Astrophysical Journal Supplement Series}, + archivePrefix = "arXiv", + eprint = {1505.01664}, + primaryClass = "astro-ph.IM", + keywords = {galaxies: irregular, galaxies: photometry, galaxies: structure, methods: data analysis, techniques: image processing, techniques: photometric}, + year = 2015, + month = sep, + volume = 220, + eid = {1}, + pages = {1}, + doi = {10.1088/0067-0049/220/1/1}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2015ApJS..220....1A}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{courtes15, + author = {{Court{\'e}s}, Ludovic and {Wurmus}, Ricardo}, + title = {Reproducible and User-Controlled Software Environments in HPC with Guix}, + journal = {Euro-Par}, + volume = {9523}, + keywords = {Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Operating Systems, Computer Science - Software Engineering}, + year = {2015}, + month = {Jun}, + eid = {arXiv:1506.02822}, + pages = {arXiv:1506.02822}, +archivePrefix = {arXiv}, + eprint = {1506.02822}, + primaryClass = {cs.DC}, + doi = {10.1007/978-3-319-27308-2_47}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150602822C}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{hinsen15, + author = {{Hinsen}, K.}, + title = {ActivePapers: a platform for publishing and archiving computer-aided research}, + journal = {F1000Research}, + year = 2015, + volume = 3, + pages = {289}, + doi = {10.12688/f1000research.5773.3}, +} + + + + + +@ARTICLE{belhajjame15, + author = {{Belhajjame}, K. and {Zhao}, Z. and {Garijo}, D. and {Gamble}, M. and {Hettne}, K. and {Palma}, R. and {Mina}, E. and {Corcho}, O. and {Gómez-Pérez}, J.M. and {Bechhofer}, S. and {Klyne}, G. and {Goble}, C}, + title = "{Using a suite of ontologies for preserving workflow-centric research objects}", + journal = {Journal of Web Semantics}, + year = 2015, + volume = 32, + pages = {16}, + doi = {10.1016/j.websem.2015.01.003}, +} + + + + + +@ARTICLE{bechhofer13, + author = {{Bechhofer}, S. and {Buchan}, I. and {De Roure}, D. and {Missier}, P. and {Ainsworth}, J. and {Bhagat}, J. and Couch, P. and Cruickshank, D. and {Delderfield}, M and Dunlop, I. and {Gamble}, M. and {Michaelides}, D. and {Owen}, S. and {Newman}, D. and {Sufi}, S. and {Goble}, C}, + title = "{Why linked data is not enough for scientists}", + journal = {Future Generation Computer Systems}, + year = 2013, + volume = 29, + pages = {599}, + doi = {10.1016/j.future.2011.08.004}, +} + + + + + +@ARTICLE{peng15, + author = {{Peng}, R.D.}, + title = {The reproducibility crisis in science: A statistical counterattack}, + journal = {Significance}, + year = 2015, + month = jun, + volume = 12, + pages = {30}, + doi = {10.1111/j.1740-9713.2015.00827.x}, +} + + + + + +@ARTICLE{katz14, + author = {Daniel S. Katz}, + title = {Transitive Credit as a Means to Address Social and Technological Concerns Stemming from Citation and Attribution of Digital Products}, + journal = {Journal of Open Research Software}, + year = {2014}, + volume = {2}, + pages = {e20}, + doi = {10.5334/jors.be}, +} + + + + + +@ARTICLE{herndon14, + author = {Thomas Herndon and Michael Ash and Robert Pollin}, + title = {Does high public debt consistently stifle economic growth? A critique of Reinhart and Rogoff}, + journal = {Cambridge Journal of Economics}, + year = {2014}, + month = {dec}, + volume = {38}, + pages = {257}, + doi = {10.1093/cje/bet075}, +} + + + + + +@ARTICLE{easterbrook14, + author = {{Easterbook}, S.}, + title = {Open code for open science?}, + journal = {Nature Geoscience}, + year = 2014, + month = oct, + volume = 7, + pages = {779}, + doi = {10.1038/ngeo2283}, +} + + + + + +@ARTICLE{fomel13, + author = {Sergey Fomel and Paul Sava and Ioan Vlad and Yang Liu and Vladimir Bashkardin}, + title = {Madagascar: open-source software project for multidimensional data analysis and reproducible computational experiments}, + journal = {Journal of open research software}, + year = {2013}, + volume = {1}, + pages = {e8}, + doi = {10.5334/jors.ag}, +} + + + + + +@ARTICLE{sandve13, + author = {{Sandve}, G.K. and {Nekrutenko}, A. and {Taylor}, J. and {Hovig}, E.}, + title = {Ten Simple Rules for Reproducible Computational Research}, + journal = {PLoS Computational Biology}, + year = 2013, + month = oct, + volume = 9, + pages = {e1003285}, + doi = {10.1371/journal.pcbi.1003285}, +} + + + + + +@ARTICLE{malik13, + author = {Tanu Malik and Quan Pham and Ian Foster}, + title = {SOLE: Towards Descriptive and Interactive Publications}, + journal = {Implementing Reproducible Research}, + year = 2013, + volume = {Chapter 2}, + pages = {1. URL: \url{https://osf.io/ns2m3}}, +} + + + + + +@ARTICLE{gronenschild12, + author = {Ed H. B. M. Gronenschild and Petra Habets and Heidi I. L. Jacobs and Ron Mengelers and Nico Rozendaal and Jim van Os and Machteld Marcelis}, + title = {The Effects of FreeSurfer Version, Workstation Type, and Macintosh Operating System Version on Anatomical Volume and Cortical Thickness Measurements}, + journal = {PLoS ONE}, + volume = {7}, + year = {2012}, + pages = {e38234}, + doi = {10.1371/journal.pone.0038234}, +} + + + + + +@ARTICLE{pham12, + author = {Quan Pham and Tanu Malik and Ian Foster and Roberto {Di Lauro} and Raffaele Montella}, + title = {SOLE: Linking Research Papers with Science Objects}, + journal = {Provenance and Annotation of Data and Processes (IPAW)}, + year = {2012}, + pages = {203}, + doi = {10.1007/978-3-642-34222-6_16}, +} + + + + + +@ARTICLE{davison12, + author = {Andrew Davison}, + title = {Automated Capture of Experiment Context for Easier Reproducibility in Computational Research}, + journal = {Computing in Science \& Engineering}, + volume = {14}, + year = {2012}, + pages = {48}, + doi = {10.1109/MCSE.2012.41}, +} + + + + + +@ARTICLE{zhao12, + author = {Jun Zhao and Jose Manuel Gomez-Perez and Khalid Belhajjame and Graham Klyne and Esteban Garcia-Cuesta and Aleix Garrido and Kristina Hettne and Marco Roos and David {De Roure} and Carole Goble}, + title = {Why workflows break — Understanding and combating decay in Taverna workflows}, + journal = {IEEE 8th International Conference on E-Science}, + year = {2012}, + pages = {1}, + doi = {10.1109/eScience.2012.6404482}, +} + + + + +@ARTICLE{vangorp11, + author = {Pieter {Van Gorp} and Steffen Mazanek}, + title = {SHARE: a web portal for creating and sharing executable research}, + journal = {Procedia Computer Science}, + year = 2011, + volume = 4, + pages = {589}, + doi = {10.1016/j.procs.2011.04.062}, +} + + + + + +@ARTICLE{hinsen11, + author = {{Hinsen}, Konrad}, + title = {A data and code model for reproducible research and executable papers}, + journal = {Procedia Computer Science}, + year = 2011, + volume = 4, + pages = {579}, + doi = {10.1016/j.procs.2011.04.061}, +} + + + + + +@ARTICLE{limare11, + author = {Nicolas Limare and Jean-Michel Morel}, + title = {The IPOL Initiative: Publishing and Testing Algorithms on Line for +Reproducible Research in Image Processing}, + journal = {Procedia Computer Science}, + year = 2011, + volume = 4, + pages = {716}, + doi = {10.1016/j.procs.2011.04.075}, +} + + + + + +@ARTICLE{gavish11, + author = {Matan Gavish and David L. Donoho}, + title = {A Universal Identifier for Computational Results}, + journal = {Procedia Computer Science}, + year = 2011, + volume = 4, + pages = {637}, + doi = {10.1016/j.procs.2011.04.067}, +} + + + + +@ARTICLE{gabriel11, + author = {Ann Gabriel and Rebecca Capone}, + title = {Executable Paper Grand Challenge Workshop}, + journal = {Procedia Computer Science}, + volume = {4}, + year = {2011}, + pages = {577}, + doi = {10.1016/j.procs.2011.04.060}, +} + + + + + +@ARTICLE{nowakowski11, + author = {Piotr Nowakowski and Eryk Ciepiela and Daniel Har\k{e}\.{z}lak and Joanna Kocot and Marek Kasztelnik and Tomasz Barty\'nski and Jan Meizner and Grzegorz Dyk and Maciej Malawski}, + title = {The Collage Authoring Environment}, + journal = {Procedia Computer Science}, + volume = {4}, + year = {2011}, + pages = {608}, + doi = {j.procs.2011.04.064}, +} + + + + + +@ARTICLE{peng11, + author = {{Peng}, R.D.}, + title = {Reproducible Research in Computational Science}, + journal = {Science}, + year = {2011}, + month = dec, + volume = 334, + pages = {1226}, + doi = {10.1126/science.1213847}, +} + + + + + +@ARTICLE{gil10, + author = {Yolanda Gil and Pedro A. González-Calero and Jihie Kim and Joshua Moody and Varun Ratnakar}, + title = {A semantic framework for automatic generation of computational workflows using distributed data and component catalogues}, + journal = {Journal of Experimental \& Theoretical Artificial Intelligence}, + year = {2010}, + volume = {23}, + pages = {389}, + doi = {10.1080/0952813X.2010.490962}, +} + + + + + +@ARTICLE{pence10, + author = {{Pence}, W.~D. and {Chiappetti}, L. and {Page}, C.~G. and {Shaw}, R.~A. and + {Stobie}, E.}, + title = "{Definition of the Flexible Image Transport System (FITS), version 3.0}", + journal = {Astronomy and Astrophysics}, + keywords = {instrumentation: miscellaneous, methods: miscellaneous, techniques: miscellaneous, reference systems, standards, astronomical databases: miscellaneous}, + year = "2010", + month = "Dec", + volume = {524}, + eid = {A42}, + pages = {A42}, + doi = {10.1051/0004-6361/201015362}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2010A\&A...524A..42P}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{goecks10, + author = {Jeremy Goecks and Anton Nekrutenko and James Taylor}, + title = {Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences}, + journal = {Genome Biology}, + year = {2010}, + volume = {11}, + pages = {R86}, + doi = {10.1186/gb-2010-11-8-r86}, +} + + + + + +@ARTICLE{merali10, + author = {Zeeya Merali}, + title = {Computational science: ...Error}, + journal = {Nature}, + year = 2010, + volume = 467, + pages = {775}, + doi = {10.1038/467775a}, +} + + + + + +@ARTICLE{casadevall10, + author = {{Casadevall}, A. and {Fang}, F.C}, + title = {Reproducible Science}, + journal = {Infection and Immunity}, + year = 2010, + volume = 78, + pages = {4972}, + doi = {10.1128/IAI.00908-10}, +} + + + + + +@ARTICLE{mesirov10, + author = {{Mesirov}, J.P.}, + title = {Accessible Reproducible Research}, + journal = {Science}, + year = 2010, + volume = 327, + pages = {415}, + doi = {10.1126/science.1179653}, +} + + + + + +@ARTICLE{cheney09, + author = {James Cheney and Laura Chiticariu and Wang-Chiew Tan}, + title = {Provenance in Databases: Why, How, and Where}, + journal = {Foundations and Trends in Databases}, + year = {2009}, + volume = {1}, + pages = {379}, + doi = {10.1561/1900000006}, +} + + + + + +@ARTICLE{ioannidis2009, + author = {John P. A. Ioannidis and David B. Allison and Catherine A. Ball and Issa Coulibaly and Xiangqin Cui and Aedín C Culhane and Mario Falchi and Cesare Furlanello and Laurence Game and Giuseppe Jurman and Jon Mangion and Tapan Mehta and Michael Nitzberg and Grier P. Page and Enrico Petretto and Vera {van Noort}}, + title = {Repeatability of published microarray gene expression analyses}, + journal = {Nature Genetics}, + year = {2009}, + volume = {41}, + pages = {149}, + doi = {10.1038/ng.295}, +} + + + + + +@ARTICLE{fomel09, + author = {Sergey Fomel and Jon F. Claerbout}, + title = {Reproducible Research}, + journal = {Computing in Science Engineering}, + year = {2009}, + volume = {11}, + pages = {5}, + doi = {10.1109/MCSE.2009.14}, +} + + + + + +@ARTICLE{baggerly09, + author = {Keith A. Baggerly and Kevin R Coombes}, + title = {Deriving chemosensitivity from cell lines: Forensic bioinformatics and reproducible research in high-throughput biology}, + journal = {The Annals of Applied Statistics}, + year = {2009}, + volume = {3}, + pages = {1309}, + doi = {10.1214/09-AOAS291}, +} + + + + + +@ARTICLE{scheidegger08, + author = {Carlos Scheidegger and David Koop and Emanuele Santos and Huy Vo and Steven Callahan and Juliana Freire and Cláudio Silva}, + title = {Tackling the Provenance Challenge one layer at a time}, + journal = {Concurrency Computation: Practice and Experiment}, + year = {2008}, + volume = {20}, + pages = {473}, + doi = {10.1002/cpe.1237}, +} + + + + + +@ARTICLE{moreau08, + author = {Moreau, Luc and Ludäscher, Bertram and Altintas, Ilkay and Barga, Roger S. and Bowers, Shawn and Callahan, Steven and Chin JR., George and Clifford, Ben and Cohen, Shirley and Cohen-Boulakia, Sarah and Davidson, Susan and Deelman, Ewa and Digiampietri, Luciano and Foster, Ian and Freire, Juliana and Frew, James and Futrelle, Joe and Gibson, Tara and Gil, Yolanda and Goble, Carole and Golbeck, Jennifer and Groth, Paul and Holland, David A. and Jiang, Sheng and Kim, Jihie and Koop, David and Krenek, Ales and McPhillips, Timothy and Mehta, Gaurang and Miles, Simon and Metzger, Dominic and Munroe, Steve and Myers, Jim and Plale, Beth and Podhorszki, Norbert and Ratnakar, Varun and Santos, Emanuele and Scheidegger, Carlos and Schuchardt, Karen and Seltzer, Margo and Simmhan, Yogesh L. and Silva, Claudio and Slaughter, Peter and Stephan, Eric and Stevens, Robert and Turi, Daniele and Vo, Huy and Wilde, Mike and Zhao, Jun and Zhao, Yong}, + title = {The First Provenance Challenge}, + journal = {Concurrency Computation: Practice and Experiment}, + year = {2008}, + volume = {20}, + pages = {473}, + doi = {10.1002/cpe.1233}, +} + + + + + +@Article{matplotlib2007, + Author = {Hunter, J. D.}, + Title = {Matplotlib: A 2D graphics environment}, + Journal = {CiSE}, + Volume = {9}, + Number = {3}, + Pages = {90}, + abstract = {Matplotlib is a 2D graphics package used for Python + for application development, interactive scripting, and + publication-quality image generation across user + interfaces and operating systems.}, + publisher = {IEEE COMPUTER SOC}, + doi = {10.1109/MCSE.2007.55}, + year = 2007 +} + + + + + +@ARTICLE{witten2007, + author = {Ben Witten and Bill Curry and Jeff Shragge}, + title = {A New Build Environment for SEP}, + journal = {Stanford Exploration Project}, + year = {2007}, + volume = {129}, + pages = {247: \url{http://sepwww.stanford.edu/data/media/public/docs/sep129/ben1.pdf}}, +} + + + + + +@ARTICLE{miller06, + author = {Greg Miller}, + title = {A Scientist's Nightmare: Software Problem Leads to Five Retractions}, + journal = {Science}, + year = {2006}, + volume = {314}, + pages = {1856}, + doi = {10.1126/science.314.5807.1856}, +} + + + + + +@ARTICLE{reich06, + author = {Michael Reich and Ted Liefeld and Joshua Gould and Jim Lerner and Pablo Tamayo and Jill P Mesirov}, + title = {GenePattern 2.0}, + journal = {Nature Genetics}, + year = {2006}, + volume = {38}, + pages = {500}, + doi = {10.1038/ng0506-500}, +} + + + + + +@ARTICLE{ludascher05, + author = {Ludäs\-cher, Bertram and Altintas, Ilkay and Berkley, Chad and Higgins, Dan and Jaeger, Efrat and Jones, Matthew and Lee, Edward A. and Tao, Jing and Zhao, Yang}, + title = {Scientific workflow management and the Kepler system}, + journal = {Concurrency Computation: Practice and Experiment}, + year = {2006}, + volume = {18}, + pages = {1039}, + doi = {10.1002/cpe.994}, +} + + + + + +@ARTICLE{ioannidis05, + author = {John P. A. Ioannidis}, + title = {Why Most Published Research Findings Are False}, + journal = {PLoS Medicine }, + year = {2005}, + volume = {2}, + pages = {e124}, + doi = {10.1371/journal.pmed.0020124}, +} + + + + + +@ARTICLE{bavoil05, + author = {Louis Bavoil and Steven P. Callahan and Patricia J. Crossno and Juliana Freire and Carlos E. Scheidegger and Cláudio T. Silva and Huy T. Vo}, + title = {VisTrails: Enabling Interactive Multiple-View Visualizations}, + journal = {VIS 05. IEEE Visualization}, + year = {2005}, + volume = {}, + pages = {135}, + doi = {10.1109/VISUAL.2005.1532788}, +} + + + + + +@ARTICLE{dolstra04, + author = {{Dolstra}, Eelco and {de Jonge}, Merijn and {Visser}, Eelco}, + title = {Nix: A Safe and Policy-Free System for Software Deployment}, + journal = {Large Installation System Administration Conference}, + year = {2004}, + volume = {18}, + pages = {79. \url{https://www.usenix.org/legacy/events/lisa04/tech/full_papers/dolstra/dolstra.pdf}}, +} + + + + + +@ARTICLE{oinn04, + author = {Oinn, Tom and Addis, Matthew and Ferris, Justin and Marvin, Darren and Senger, Martin and Greenwood, Mark and Carver, Tim and Glover, Kevin and Pocock, Matthew R. and Wipat, Anil and Li, Peter}, + title = {Taverna: a tool for the composition and enactment of bioinformatics workflows}, + journal = {Bioinformatics}, + year = {2004}, + volume = {20}, + pages = {3045}, + doi = {10.1093/bioinformatics/bth361}, +} + + + + + +@ARTICLE{schwab2000, + author = {Matthias Schwab and Martin Karrenbach and Jon F. Claerbout}, + title = {Making scientific computations reproducible}, + journal = {Computing in Science \& Engineering}, + year = {2000}, + volume = {2}, + pages = {61}, + doi = {10.1109/5992.881708}, +} + + + + + +@ARTICLE{buckheit1995, + author = {Jonathan B. Buckheit and David L. Donoho}, + title = {WaveLab and Reproducible Research}, + journal = {Wavelets and Statistics}, + year = {1995}, + volume = {1}, + pages = {55}, + doi = {10.1007/978-1-4612-2544-7\_5}, +} + + + + + +@ARTICLE{claerbout1992, + author = {Jon F. Claerbout and Martin Karrenbach}, + title = {Electronic documents give reproducible research a new meaning}, + journal = {SEG Technical Program Expanded Abstracts}, + year = {1992}, + volume = {1}, + pages = {601}, + doi = {10.1190/1.1822162}, +} + + + + + +@ARTICLE{eker03, + author = {Johan Eker and Jorn W Janneck and Edward A. Lee and Jie Liu and Xiaojun Liu and Jozsef Ludvig and Sonia Sachs and Yuhong Xiong and Stephen Neuendorffer}, + title = {Taming heterogeneity - the Ptolemy approach}, + journal = {Proceedings of the IEEE}, + year = {2003}, + volume = {91}, + pages = {127}, + doi = {10.1109/JPROC.2002.805829}, +} + + + + + +@ARTICLE{stevens03, + author = {Robert Stevens and Kevin Glover and Chris Greenhalgh and Claire Jennings and Simon Pearce and Peter Li and Melena Radenkovic and Anil Wipat}, + title = {Performing in silico Experiments on the Grid: A Users Perspective}, + journal = {Proceedings of UK e-Science All Hands Meeting}, + year = {2003}, + pages = {43}, +} + + + + + +@ARTICLE{knuth84, + author = {Donald Knuth}, + title = {Literate Programming}, + journal = {The Computer Journal}, + year = {1984}, + volume = {27}, + pages = {97}, + doi = {10.1093/comjnl/27.2.97}, +} + + + + + +@ARTICLE{stallman88, + author = {Richard M. Stallman and Roland McGrath and Paul D. Smith}, + title = {GNU Make: a program for directing recompilation}, + journal = {Free Software Foundation}, + year = {1988}, + pages = {ISBN:1-882114-83-3. \url{https://www.gnu.org/s/make/manual/make.pdf}}, +} + + + + + +@ARTICLE{somogyi87, + author = {Zoltan Somogyi}, + title = {Cake: a fifth generation version of make}, + journal = {University of Melbourne}, + year = {1987}, + pages = {1: \url{https://pdfs.semanticscholar.org/3e97/3b5c9af7763d70cdfaabdd1b96b3b75b5483.pdf}}, +} + + + + + +@ARTICLE{feldman79, + author = {Stuart I. Feldman}, + title = {Make -- a program for maintaining computer programs}, + journal = {Journal of Software: Practice and Experience}, + volume = {9}, + pages = {255}, + year = {1979}, + doi = {10.1002/spe.4380090402}, +} + + + + + +@ARTICLE{mcilroy78, + author = {M. D. McIlroy and E. N. Pinson and B. A. Tague}, + title = {UNIX Time-Sharing System: Forward}, + journal = {\doihref{https://archive.org/details/bstj57-6-1899/mode/2up}{Bell System Technical Journal}}, + year = {1978}, + volume = {57}, + pages = {6, ark:/13960/t0gt6xf72}, + doi = {}, +} + + + + + +@ARTICLE{anscombe73, + author = {{Anscombe}, F.J.}, + title = {Graphs in Statistical Analysis}, + journal = {The American Statistician}, + year = {1973}, + volume = {27}, + pages = {17}, + doi = {10.1080/00031305.1973.10478966}, +} + + + + + +@ARTICLE{roberts69, + author = {{Roberts}, K.V.}, + title = {The publication of scientific fortran programs}, + journal = {Computer Physics Communications}, + year = {1969}, + volume = {1}, + pages = {1}, + doi = {10.1016/0010-4655(69)90011-3}, +} diff --git a/tex/src/references.tex b/tex/src/references.tex deleted file mode 100644 index e19ec16..0000000 --- a/tex/src/references.tex +++ /dev/null @@ -1,1772 +0,0 @@ -@ARTICLE{clement19, - author = {Cl\'ement-Fontaine, M\'elanie and Di Cosmo, Roberto and Guerry, Bastien and MOREAU, Patrick and Pellegrini, Fran\c cois}, - title = {Encouraging a wider usage of software derived from research}, - year = {2019}, - journal = {Archives ouvertes HAL}, - volume = {}, - pages = {\href{https://hal.archives-ouvertes.fr/hal-02545142}{hal-02545142}}, -} - - - - - -@ARTICLE{dicosmo20, - author = {{Di Cosmo}, Roberto and {Gruenpeter}, Morane and {Zacchiroli}, Stefano}, - title = "{Referencing Source Code Artifacts: a Separate Concern in Software Citation}", - journal = {Computing in Science \& Engineering}, - year = 2020, - volume = 22, - eid = {arXiv:2001.08647}, - pages = {33}, -archivePrefix = {arXiv}, - eprint = {2001.08647}, - primaryClass = {cs.DL}, - doi = {10.1109/MCSE.2019.2963148}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200108647D}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{menke20, - author = {Joe Menke and Martijn Roelandse and Burak Ozyurt and Maryann Martone and Anita Bandrowski}, - title = {Rigor and Transparency Index, a new metric of quality for assessing biological and medical science methods}, - year = {2020}, - journal = {bioRxiv}, - volume = {}, - pages = {2020.01.15.908111}, - doi = {10.1101/2020.01.15.908111}, -} - - - - - -@ARTICLE{konkol20, - author = {{Konkol}, Markus and {N{\"u}st}, Daniel and {Goulier}, Laura}, - title = "{Publishing computational research -- A review of infrastructures for reproducible and transparent scholarly communication}", - journal = {arXiv}, - year = 2020, - month = jan, - pages = {2001.00484}, -archivePrefix = {arXiv}, - eprint = {2001.00484}, - primaryClass = {cs.DL}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200100484K}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{infante20, - author = {{Infante-Sainz}, Ra{\'u}l and {Trujillo}, Ignacio and - {Rom{\'a}n}, Javier}, - title = "{The Sloan Digital Sky Survey extended point spread functions}", - journal = {Monthly Notices of the Royal Astronomical Society}, - keywords = {instrumentation: detectors, methods: data analysis, techniques: image processing, techniques: photometric, galaxies: haloes, Astrophysics - Instrumentation and Methods for Astrophysics, Astrophysics - Astrophysics of Galaxies}, - year = "2020", - month = "Feb", - volume = {491}, - number = {4}, - pages = {5317}, - doi = {10.1093/mnras/stz3111}, -archivePrefix = {arXiv}, - eprint = {1911.01430}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2020MNRAS.491.5317I}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{gibney20, - author = {Elizabeth Gibney}, - title = {This AI researcher is trying to ward off a reproducibility crisis}, - year = {2020}, - journal = {Nature}, - volume = {577}, - pages = {14}, - doi = {10.1038/d41586-019-03895-5}, -} - - - - - -@ARTICLE{pimentel19, - author = {{Jo\~ao Felipe} Pimentel and Leonardo Murta and Vanessa Braganholo and Juliana Freire}, - title = {A large-scale study about quality and reproducibility of jupyter notebooks}, - year = {2019}, - journal = {Proceedings of the 16th International Conference on Mining Software Repositories}, - volume = {1}, - pages = {507}, - doi = {10.1109/MSR.2019.00077}, -} - - - - - -@ARTICLE{miksa19a, - author = {Tomasz Miksa and Paul Walk and Peter Neish}, - title = {RDA DMP Common Standard for Machine-actionable Data Management Plans}, - year = {2019}, - journal = {RDA}, - pages = {doi:10.15497/rda00039}, - doi = {10.15497/rda00039}, -} - - - - - -@ARTICLE{miksa19b, - author = {Tomasz Miksa and Stephanie Simms and Daniel Mietchen and Sarah Jones}, - title = {Ten principles for machine-actionable data management plans}, - year = {2019}, - journal = {PLoS Computational Biology}, - volume = {15}, - pages = {e1006750}, - doi = {10.1371/journal.pcbi.1006750}, -} - - - - - -@ARTICLE{dicosmo19, - author = {Roberto {Di Cosmo} and Francois Pellegrini}, - title = {Encouraging a wider usage of software derived from research}, - year = {2019}, - journal = {\doihref{https://www.ouvrirlascience.fr/wp-content/uploads/2020/02/Opportunity-Note_software-derived-from-research_EN.pdf}{Ouvrir la science}}, - volume = {}, - pages = {}, - doi = {}, -} - - - - - -@ARTICLE{perignon19, - author = {Christophe P\'erignon and Kamel Gadouche and Christophe Hurlin and Roxane Silberman and Eric Debonnel}, - title = {Certify reproducibility with confidential data}, - year = {2019}, - journal = {Science}, - volume = {365}, - pages = {127}, - doi = {10.1126/science.aaw2825}, -} - - - - - -@ARTICLE{munafo19, - author = {Marcus Munaf\'o}, - title = {Raising research quality will require collective action}, - year = {2019}, - journal = {Nature}, - volume = {576}, - pages = {183}, - doi = {10.1038/d41586-019-03750-7}, -} - - - - - -@ARTICLE{jones19, - author = {{Jones}, M.~G. and {Verdes-Montenegro}, L. and {Damas-Segovia}, A. and - {Borthakur}, S. and {Yun}, M. and {del Olmo}, A. and {Perea}, J. and - {Rom{\'a}n}, J. and {Luna}, S. and {Lopez Gutierrez}, D. and - {Williams}, B. and {Vogt}, F.~P.~A. and {Garrido}, J. and - {Sanchez}, S. and {Cannon}, J. and {Ram{\'\i}rez-Moreta}, P.}, - title = "{Evolution of compact groups from intermediate to final stages. A case study of the H I content of HCG 16}", - journal = {Astronomy \& Astrophysics}, - eprint = {1910.03420}, - keywords = {galaxies: groups: individual: HCG 16, galaxies: interactions, galaxies: evolution, galaxies: ISM, radio lines: galaxies}, - year = "2019", - month = "Dec", - volume = {632}, - eid = {A78}, - pages = {A78}, - doi = {10.1051/0004-6361/201936349}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2019A&A...632A..78J}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{banek19, - author = {{Banek}, Christine and {Thornton}, Adam and {Economou}, Frossie and - {Fausti}, Angelo and {Krughoff}, K. Simon and {Sick}, Jonathan}, - title = "{Why is the LSST Science Platform built on Kubernetes?}", - journal = {Proceedings of ADASS XXIX}, - volume = {arXiv}, - keywords = {Astrophysics - Instrumentation and Methods for Astrophysics}, - year = "2019", - month = "Nov", - eid = {arXiv:1911.06404}, - pages = {1911.06404}, -archivePrefix = {arXiv}, - eprint = {1911.06404}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv191106404B}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{fineberg19, - author = {Harvey V. Fineberg and David B. Allison and Lorena A. Barba and Dianne Chong and David L. Donoho and Juliana Freire and Gerald Gabrielse and Constantine Gatsonis and Edward Hall and Thomas H. Jordan and Dietram A. Scheufele and Victoria Stodden and Simine Vazire, Timothy D. Wilson and Wendy Wood and Jennifer Heimberg and Thomas Arrison and Michael Cohen and Michele Schwalbe and Adrienne Stith Butler and Barbara A. Wanchisen and Tina Winters and Rebecca Morgan and Thelma Cox and Lesley Webb and Garret Tyson and Erin Hammers Forstag}, - title = {Reproducibility and Replicability in Science}, - journal = {The National Academies Press}, - year = 2019, - pages = {1}, - doi = {10.17226/25303}, -} - - - - - -@ARTICLE{akhlaghi19, - author = {{Akhlaghi}, Mohammad}, - title = "{Carving out the low surface brightness universe with NoiseChisel}", - journal = {IAU Symposium 355}, - volume = {arXiv}, - keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Astrophysics - Astrophysics of Galaxies, Computer Science - Computer Vision and Pattern Recognition}, - year = "2019", - month = "Sep", - eid = {arXiv:1909.11230}, - pages = {1909.11230}, -archivePrefix = {arXiv}, - eprint = {1909.11230}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190911230A}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{cribbs19, - author = {Cribbs, AP and Luna-Valero, S and George, C and Sudbery, IM and Berlanga-Taylor, AJ and Sansom, SN and Smith, T and Ilott, NE and Johnson, J and Scaber, J and Brown, K and Sims, D and Heger, A}, - title = {CGAT-core: a python framework for building scalable, reproducible computational biology workflows [version 2; peer review: 1 approved, 1 approved with reservations]}, - journal = {F1000Research}, - year = 2019, - volume = 8, - pages = {377}, - doi = {10.12688/f1000research.18674.2}, -} - - - - - -@ARTICLE{brinckman19, -author = "Adam Brinckman and Kyle Chard and Niall Gaffney and Mihael Hategan and Matthew B. Jones and Kacper Kowalik and Sivakumar Kulasekaran and Bertram Ludäscher and Bryce D. Mecum and Jarek Nabrzyski and Victoria Stodden and Ian J. Taylor and Matthew J. Turk and Kandace Turner", - title = {Computing environments for reproducibility: Capturing the ``Whole Tale''}, - journal = {Future Generation Computer Systems}, - year = 2019, - volume = 94, - pages = 854, - doi = {10.1016/j.future.2017.12.029}, -} - - - - - -@ARTICLE{uhse19, - author = {Uhse, Simon and Pflug, Florian G. and {von Haeseler}, Arndt and Djamei, Armin}, - title = {Insertion Pool Sequencing for Insertional Mutant Analysis in Complex Host‐Microbe Interactions}, - journal = {Current Protocols in Plant Biology}, - volume = {4}, - year = "2019", - month = "July", - pages = {e20097}, - doi = {10.1002/cppb.20097}, -} - - - - - -@ARTICLE{alliez19, - author = {{Alliez}, Pierre and {Di Cosmo}, Roberto and {Guedj}, Benjamin and - {Girault}, Alain and {Hacid}, Mohand-Said and {Legrand}, Arnaud and - {Rougier}, Nicolas P.}, - title = "{Attributing and Referencing (Research) Software: Best Practices and Outlook from Inria}", - journal = {Computing in Science \& Engineering}, - volume = {22}, - keywords = {Computer Science - Digital Libraries, Computer Science - Software Engineering}, - year = "2019", - month = "May", - pages = {39}, -archivePrefix = {arXiv}, - eprint = {1905.11123}, - primaryClass = {cs.DL}, - doi = {10.1109/MCSE.2019.2949413}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190511123A}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{kneller19, - author = {Kneller,Gerald R. and Hinsen,Konrad}, - title = {Memory effects in a random walk description of protein structure ensembles}, - journal = {The Journal of Chemical Physics}, - volume = {150}, - year = {2019}, - pages = {064911}, - doi = {10.1063/1.5054887}, -} - - - - - -@article{tange18, - author = {Tange, Ole}, - title = {GNU Parallel 2018}, - Journal = {Zenodo}, - volume = {1146014}, - pages = {\href{https://doi.org/10.5281/zenodo.1146014}{DOI:10.5281/zenodo.1146014}}, - year = 2018, - ISBN = {9781387509881}, - doi = {10.5281/zenodo.1146014}, - url = {https://doi.org/10.5281/zenodo.1146014} -} - - - - - -@ARTICLE{rule18, - author = {Adam Rule and Aur\'elien Tabard and {James D.} Hollan}, - title = {Exploration and Explanation in Computational Notebooks}, - journal = {Proceedings of the 2018 CHI Conference on Human Factors in Computing Systems}, - volume = {1}, - year = {2018}, - pages = {30}, - doi = {10.1145/3173574.3173606}, -} - - - - - -@ARTICLE{plesser18, - author = {Hans E. Plesser}, - title = {Reproducibility vs. Replicability: A Brief History of a Confused Terminology}, - journal = {Frontiers in Neuroinformatics}, - volume = {11}, - year = {2018}, - pages = {76}, - doi = {10.3389/fninf.2017.00076}, -} - - - - - -@ARTICLE{zhang18, - author = {{Zhang}, Zhi-Yu and {Romano}, D. and {Ivison}, R.~J. and - {Papadopoulos}, Padelis P. and {Matteucci}, F.}, - title = "{Stellar populations dominated by massive stars in dusty starburst galaxies across cosmic time}", - journal = {Nature}, - keywords = {Astrophysics - Astrophysics of Galaxies}, - year = "2018", - month = "Jun", - volume = {558}, - number = {7709}, - pages = {260}, - doi = {10.1038/s41586-018-0196-x}, -archivePrefix = {arXiv}, - eprint = {1806.01280}, - primaryClass = {astro-ph.GA}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2018Natur.558..260Z}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{smart18, - author = {{Smart}, A.G.}, - title = {The war over supercooled water}, - journal = {Physics Today}, - volume = {Aug}, - year = "2018", - pages = {DOI:10.1063/PT.6.1.20180822a}, - doi = {10.1063/PT.6.1.20180822a}, -} - - - - - -@ARTICLE{kaiser18, - author = {{Kaiser}, J.}, - title = {Plan to replicate 50 high-impact cancer papers shrinks to just 18}, - journal = {Science}, - volume = {Jul}, - year = "2018", - pages = {31}, - doi = {10.1126/science.aau9619}, -} - - - - - -@ARTICLE{dicosmo18, - author = {{Di Cosmo}, Roberto and {Gruenpeter}, Morane and {Zacchiroli}, Stefano}, - title = {Identifiers for Digital Objects: The case of software source code preservation}, - journal = {Proceedings of iPRES 2018}, - year = "2018", - pages = {204.4}, - doi = {10.17605/osf.io/kde56}, -} - - - - - -@ARTICLE{gruning18, - author = {Gr\"uning, Bj\"orn and Chilton, John and K\"oster, Johannes and Dale, Ryan and Soranzo, Nicola and {van den Beek}, Marius and Goecks, Jeremy and Backofen, Rolf and Nekrutenko, Anton and Taylor, James}, - title = {Practical Computational Reproducibility in the Life Sciences}, - journal = {Cell Systems}, - volume = 6, - year = "2018", - pages = {631. bioRxiv:\href{https://www.biorxiv.org/content/10.1101/200683v2}{200683}}, - doi = {10.1016/j.cels.2018.03.014}, -} - - - - - -@ARTICLE{allen18, - author = {{Allen}, Alice and {Teuben}, Peter J. and {Ryan}, P. Wesley}, - title = "{Schroedinger's Code: A Preliminary Study on Research Source Code Availability and Link Persistence in Astrophysics}", - journal = {The Astrophysical Journal Supplement Series}, - keywords = {methods: numerical, Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Digital Libraries}, - year = "2018", - month = "May", - volume = {236}, - number = {1}, - eid = {10}, - pages = {10}, - doi = {10.3847/1538-4365/aab764}, -archivePrefix = {arXiv}, - eprint = {1801.02094}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2018ApJS..236...10A}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{burrell18, - author = {{Burrell}, A.G. and {Halford}, A. and {Klenzing}, J. and {Stoneback}, R.A. and {Morley}, S.K. and {Annex}, A.M. and {Laundal}, K.M. and {Kellerman}, A.C. and {Stansby}, D. and {Ma}, J.}, - title = {Snakes on a Spaceship—An Overview of Python in Heliophysics}, - journal = {Journal of Geophysical Research: Space Physics}, - volume = {123}, - year = "2018", - pages = {384}, - doi = {10.1029/2018JA025877}, -} - - - - - -@article{stodden18, - author = {{Stodden}, V. and {Seiler}, J. and {Ma}, Z.}, - title = {An empirical analysis of journal policy effectiveness for computational reproducibility}, - volume = {115}, - number = {11}, - pages = {2584}, - year = {2018}, - doi = {10.1073/pnas.1708290115}, - issn = {0027-8424}, - URL = {https://www.pnas.org/content/115/11/2584}, - journal = {Proceedings of the National Academy of Sciences} -} - - - - - -@article {fanelli18, - author = {{Fanelli}, D.}, - title = {Opinion: Is science really facing a reproducibility crisis, and do we need it to?}, - volume = {115}, - number = {11}, - pages = {2628}, - year = {2018}, - doi = {10.1073/pnas.1708272114}, - publisher = {National Academy of Sciences}, - issn = {0027-8424}, - URL = {https://www.pnas.org/content/115/11/2628}, - journal = {Proceedings of the National Academy of Sciences} -} - - - - - - -@ARTICLE{lewis18, - author = {{Lewis}, L.M. and {Edwards}, M.C. and {Meyers}, Z.R. and {Conover Talbot}, C. and {Hao}, H. and {Blum}, D. }, - title = "{Replication Study: Transcriptional amplification in tumor cells with elevated c-Myc}", - journal = {eLife}, - volume = {7}, - year = "2018", - month = "January", - pages = {e30274}, - doi = {10.7554/eLife.30274}, -} - - - - - -@ARTICLE{akhlaghi18b, - author = {{Akhlaghi}, Mohammad and {Bacon}, Roland and {Inami}, Hanae}, - title = "{MUSE HUDF survey I \& II, Sections 7.3 \& 3.4: photometry for objects with no prior broad-band segmentation map}", - journal = {Zenodo}, - pages = {DOI:10.5281/zenodo.1164774}, - year = "2018", - month = "February", - doi = {10.5281/zenodo.1164774}, -} - - - - - -@ARTICLE{akhlaghi18a, - author = {{Akhlaghi}, Mohammad and {Bacon}, Roland}, - title = "{MUSE HUDF survey I, Section 4: data and reproduction pipeline for photometry and astrometry}", - journal = {Zenodo}, - pages = {DOI:10.5281/zenodo.1163746}, - year = "2018", - month = "January", - doi = {10.5281/zenodo.1163746}, -} - - - - - -@ARTICLE{leek17, - author = {Jeffrey T. Leek and Leah R. Jager}, - title = {Is Most Published Research Really False?}, - journal = {Annual Review of Statistics and Its Application}, - volume = {4}, - year = {2017}, - pages = {109}, - doi = {10.1146/annurev-statistics-060116-054104}, -} - - - - - -@ARTICLE{reich17, - author = {Michael Reich and Thorin Tabor and Ted Liefeld and Helga Thorvaldsdóttir and Barbara Hill and Pablo Tamayo and Jill P. Mesirov}, - title = {The GenePattern Notebook Environment}, - journal = {Cell Systems}, - year = {2017}, - volume = {5}, - pages = {149}, - doi = {10.1016/j.cels.2017.07.003}, -} - - - - - -@ARTICLE{becker17, - author = {Gabriel Becker and Cory Barr and Robert Gentleman and Michael Lawrence}, - title = {Enhancing Reproducibility and Collaboration via Management of R Package Cohorts}, - journal = {Journal of Statistical Software, Articles}, - volume = {82}, - pages = 1, - year = "2017", -archivePrefix = {arXiv}, - eprint = {1501.02284}, - doi = {10.18637/jss.v082.i01}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150102284B}, -} - - - - - -@ARTICLE{jenness17, - author = {{Jenness}, Tim}, - title = "{Modern Python at the Large Synoptic Survey Telescope}", - journal = {ADASS 27}, - year = "2017", - month = "Dec", - eid = {arXiv:1712.00461}, - pages = {arXiv:1712.00461}, -archivePrefix = {arXiv}, - eprint = {1712.00461}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2017arXiv171200461J}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@article{bezanson17, - title={Julia: A fresh approach to numerical computing}, - author={Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B}, - journal={SIAM {R}eview}, - volume={59}, - number={1}, - pages={65}, - year={2017}, - archivePrefix={arXiv}, - eprint={1411.1607}, - publisher={SIAM}, - doi={10.1137/141000671}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2014arXiv1411.1607B}, -} - - - - - -@ARTICLE{melson17, - author = {{Melsen}, L.A. and {Torfs}, P.J.J.F and {Uijlenhoet}, R. and {Teuling}, A.J.}, - title = {Comment on “Most computational hydrology is not reproducible, so is it really science?” by Christopher Hutton et al.}, - journal = {Water Resources Research}, - volume = 53, - pages = {2568}, - year = {2017}, - doi = {10.1002/2016WR020208}, -} - - - - - -@ARTICLE{munafo17, - author = {{Munaf\'o}, M.R. and {Nosek}, B.A. and {Bishop}, D.V.M. and {Button}, K.S. and {Chambers}, C.D. and {Percie du Sert}, N. and {Simonsohn}, U. and {Wagenmakers}, E.J. and {Ware}, J.J. {Ioannidis}, J.P.A.}, - title = {A manifesto for reproducible science}, - journal = {Nature Human Behaviour}, - volume = 1, - pages = {21}, - year = {2017}, - doi = {10.1038/s41562-016-0021}, -} - - - - - -@ARTICLE{jimenez17, - title={The popper convention: Making reproducible systems evaluation practical}, - author = {{Jimenez}, I. and {Sevilla}, M. and {Watkins}, N. and {Maltzahn}, C. and {Lofstead}, J. and {Mohror}, K. and {Arpaci-Dusseau}, A. and {Arpaci-Dusseau}, R.}, - journal = {IEEE IPDPSW}, - pages = {1561}, - year = {2017}, - doi = {10.1109/IPDPSW.2017.157}, -} - - - - - -@ARTICLE{bacon17, - author = {{Bacon}, Roland and {Conseil}, Simon and {Mary}, David and - {Brinchmann}, Jarle and {Shepherd}, Martin and {Akhlaghi}, Mohammad and - {Weilbacher}, Peter M. and {Piqueras}, Laure and {Wisotzki}, Lutz and - {Lagattuta}, David and {Epinat}, Benoit and {Guerou}, Adrien and - {Inami}, Hanae and {Cantalupo}, Sebastiano and - {Courbot}, Jean Baptiste and {Contini}, Thierry and {Richard}, Johan and - {Maseda}, Michael and {Bouwens}, Rychard and {Bouch{\'e}}, Nicolas and - {Kollatschny}, Wolfram and {Schaye}, Joop and {Marino}, Raffaella Anna and - {Pello}, Roser and {Herenz}, Christian and {Guiderdoni}, Bruno and - {Carollo}, Marcella}, - title = "{The MUSE Hubble Ultra Deep Field Survey. I. Survey description, data reduction, and source detection}", - journal = {Astronomy \& Astrophysics}, - keywords = {galaxies: distances and redshifts, galaxies: high-redshift, cosmology: observations, methods: data analysis, techniques: imaging spectroscopy, galaxies: formation, Astrophysics - Astrophysics of Galaxies}, - year = "2017", - month = "Nov", - volume = {608}, - eid = {A1}, - pages = {A1}, - doi = {10.1051/0004-6361/201730833}, -archivePrefix = {arXiv}, - eprint = {1710.03002}, - primaryClass = {astro-ph.GA}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2017A\&A...608A...1B}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{austin17, - author = {{Claire C.} Austin and Theodora Bloom and Sünje Dallmeier-Tiessen and {Varsha K.} Khodiyar and Fiona Murphy and Amy Nurnberger and Lisa Raymond and Martina Stockhause and Jonathan Tedds and Mary Vardigan and Angus Whyte}, - title = {Key components of data publishing: using current best practices to develop a reference model for data publishing}, - journal = {International Journal on Digital Libraries}, - volume = {18}, - year = {2017}, - pages = {77}, - doi = {10.1007/s00799-016-0178-2}, -} - - - - - -@ARTICLE{smith16, - author = {Arfon M. Smith and Daniel S. Katz and Kyle E. Niemeyer}, - title = {Software citation principles}, - journal = {PeerJ Computer Science}, - volume = {2}, - year = {2016}, - pages = {e86}, - doi = {10.7717/peerj-cs.86}, -} - - - - - -@ARTICLE{ziemann16, - author = {Mark Ziemann and Yotam Eren and Assam El-Osta}, - title = {Gene name errors are widespread in the scientific literature}, - journal = {Genome Biology}, - volume = {17}, - year = {2016}, - pages = {177}, - doi = {10.1186/s13059-016-1044-7}, -} - - - - - -@ARTICLE{hinsen16, - author = {Konrad Hinsen}, - title = {Scientific notations for the digital era}, - journal = {The Self Journal of Science}, - year = {2016}, - pages = {1: arXiv:\href{https://arxiv.org/abs/1605.02960}{1605.02960}}, -} - - - - - -@ARTICLE{kluyver16, - author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando Pérez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Damián Avila and Safia Abdalla and Carol Willing}, - title = "{Jupyter Notebooks – a publishing format for reproducible computational workflows}", - journal = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, - year = {2016}, - pages = {87}, - doi = {10.3233/978-1-61499-649-1-87}, -} - - - - - -@ARTICLE{baker16, - author = {{Baker}, M.}, - title = "{Is there a reproducibility crisis?}", - journal = {Nature}, - volume = {533}, - year = "2016", - month = "May", - pages = {452}, - doi = {10.1038/533452a}, -} - - - - - -@ARTICLE{wilkinson16, - author = { {Wilkinson}, M.D and {Dumontier}, M. and {Aalbersberg}, I.J. and {Appleton}, G. and {Axton}, M. and {Baak}, A. and {Blomberg}, N. and {Boiten}, J. and {da Silva Santos}, L.B and {Bourne}, P.E. and {Bouwman}, J. and {Brookes}, A.J. and {Clark}, T. and {Crosas}, M. and {Dillo}, I. and {Dumon}, O. and {Edmunds}, S. and {Evelo}, C. and {Finkers}, R. and {Gonzalez-Beltran}, A. and {Gray}, A.J.G. and {Groth}, P. and {Goble}, C. and {Grethe}, Jeffrey S. and {Heringa}, J. and {’t Hoen}, P.A.C and {Hooft}, R. and {Kuhn}, T. and {Kok}, R. and {Kok}, J. and {Lusher}, S. and {Martone}, M. and {Mons}, A. and {Packer}, A. and {Persson}, B. and {Rocca-Serra}, P. and {Roos}, M. and {van Schaik}, R. and {Sansone}, S. and {Schultes}, E. and {Sengstag}, T. and {Slater}, T. and {Strawn}, G. and {Swertz}, M. and {Thompson}, M. and {van der Lei}, J. and {van Mulligen}, E. and {Velterop}, J. and {Waagmeester}, A. and {Wittenburg}, P. and {Wolstencroft}, K. and {Zhao}, J. and {Mons}, B.}, - title = "{The FAIR Guiding Principles for scientific data management and stewardship}", - journal = {Scientific Data}, - year = 2016, - month = mar, - volume = 3, - pages = {160018}, - doi = {10.1038/sdata.2016.18}, -} - - - - -@ARTICLE{hutton16, - author = {{Hutton}, C. and {Wagener}, T. and {Freer}, J. and {Han}, D. and {Duffy}, C. and {Arheimer}, B.}, - title = {Most computational hydrology is not reproducible, so is it really science?}, - journal = {Water Resources Research}, - year = {2016}, - volume = 52, - pages = {7548}, - doi = {10.1002/2016WR019285}, -} - - - - - -@ARTICLE{topalidou16, - author = {{Topalidou}, M. and {Leblois}, A. and {Boraud}, T. and {Rougier}, N.P.}, - title = {A long journey into reproducible computational neuroscience}, - journal = {Frontiers in Computational Neuroscience}, - year = {2016}, - volume = 9, - pages = {30}, - doi = {10.3389/fncom.2015.00030}, -} - - - - - -@ARTICLE{gil16, - author = {{Gil}, Yolanda and {David}, C.H. and {Demir}, I. and {Essawy}, B.T. and {Fulweiler}, R.W. and {Goodall}, J.L. and {Karlstrom}, L. and {Lee}, H. and {Mills}, H.J. and {Oh}, J. and {Pierce}, S.A. and {Pope}, A. and {Tzeng}, M.W. and {Villamizar}, S.R. and {Yu}, X}, - title = {Toward the Geoscience Paper of the Future: Best practices for documenting and sharing research from data to software to provenance}, - journal = {Earth and Space Science}, - year = 2016, - volume = 3, - pages = {388}, - doi = {10.1002/2015EA000136}, -} - - - - - -@ARTICLE{romine15, - author = {Charles H. Romine}, - title = {Secure Hash Standard (SHS)}, - journal = {Federal Information processing standards publication}, - volume = {180}, - pages = {4}, - year = {2015}, - doi = {10.6028/NIST.FIPS.180-4}, -} - - - - - -@ARTICLE{horvath15, - author = {Steve Horvath}, - title = {Erratum to: DNA methylation age of human tissues and cell types}, - journal = {Genome Biology}, - volume = {16}, - pages = {96}, - year = {2015}, - doi = {10.1186/s13059-015-0649-6}, -} - - - - - -@ARTICLE{chang15, - author = {Andrew C. Chang and Phillip Li}, - title = {Is Economics Research Replicable? Sixty Published Papers from Thirteen Journals Say ``Usually Not''}, - journal = {Finance and Economics Discussion Series 2015-083}, - year = {2015}, - pages = {1}, - doi = {10.17016/FEDS.2015.083}, -} - - - - - -@ARTICLE{schaffer15, - author = {Jonathan Schaffer}, - title = {What Not to Multiply Without Necessity}, - journal = {Australasian Journal of Philosophy}, - volume = {93}, - pages = {644}, - year = {2015}, - doi = {10.1080/00048402.2014.992447}, -} - - - - - -@ARTICLE{clarkso15, - author = "Chris Clarkson and Mike Smith and Ben Marwick and Richard Fullagar and Lynley A. Wallis and Patrick Faulkner and Tiina Manne and Elspeth Hayes and Richard G. Roberts and Zenobia Jacobs and Xavier Carah and Kelsey M. Lowe and Jacqueline Matthews and S. Anna Florin", - title = {The archaeology, chronology and stratigraphy of Madjedbebe (Malakunanja II): A site in northern Australia with early occupation}, - journal = {Journal of Human Evolution}, - year = 2015, - volume = 83, - pages = 46, - doi = {10.1016/j.jhevol.2015.03.014}, -} - - - - - -@ARTICLE{meng15, - author = {Haiyan Meng and Rupa Kommineni and Quan Pham and Robert Gardner and Tanu Malik and Douglas Thain}, - title = {An invariant framework for conducting reproducible computational science}, - journal = {Journal of Computational Science}, - year = 2015, - volume = 9, - pages = 137, - doi = {10.1016/j.jocs.2015.04.012}, -} - - - - - -@ARTICLE{gamblin15, - author = {Gamblin, Todd and LeGendre, Matthew and Collette, Michael R. and Lee, Gregory L. and Moody, Adam and {de Supinski}, Bronis R. and Futral, Scott}, - title = {The Spack package manager: bringing order to HPC software chaos}, - journal = {IEEE SC15}, - year = 2015, - volume = 1, - pages = {1}, - doi = {10.1145/2807591.2807623}, -} - - - - -@ARTICLE{akhlaghi15, - author = {{Akhlaghi}, M. and {Ichikawa}, T.}, - title = "{Noise-based Detection and Segmentation of Nebulous Objects}", - journal = {The Astrophysical Journal Supplement Series}, - archivePrefix = "arXiv", - eprint = {1505.01664}, - primaryClass = "astro-ph.IM", - keywords = {galaxies: irregular, galaxies: photometry, galaxies: structure, methods: data analysis, techniques: image processing, techniques: photometric}, - year = 2015, - month = sep, - volume = 220, - eid = {1}, - pages = {1}, - doi = {10.1088/0067-0049/220/1/1}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2015ApJS..220....1A}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{courtes15, - author = {{Court{\'e}s}, Ludovic and {Wurmus}, Ricardo}, - title = {Reproducible and User-Controlled Software Environments in HPC with Guix}, - journal = {Euro-Par}, - volume = {9523}, - keywords = {Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Operating Systems, Computer Science - Software Engineering}, - year = {2015}, - month = {Jun}, - eid = {arXiv:1506.02822}, - pages = {arXiv:1506.02822}, -archivePrefix = {arXiv}, - eprint = {1506.02822}, - primaryClass = {cs.DC}, - doi = {10.1007/978-3-319-27308-2_47}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150602822C}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{hinsen15, - author = {{Hinsen}, K.}, - title = {ActivePapers: a platform for publishing and archiving computer-aided research}, - journal = {F1000Research}, - year = 2015, - volume = 3, - pages = {289}, - doi = {10.12688/f1000research.5773.3}, -} - - - - - -@ARTICLE{belhajjame15, - author = {{Belhajjame}, K. and {Zhao}, Z. and {Garijo}, D. and {Gamble}, M. and {Hettne}, K. and {Palma}, R. and {Mina}, E. and {Corcho}, O. and {Gómez-Pérez}, J.M. and {Bechhofer}, S. and {Klyne}, G. and {Goble}, C}, - title = "{Using a suite of ontologies for preserving workflow-centric research objects}", - journal = {Journal of Web Semantics}, - year = 2015, - volume = 32, - pages = {16}, - doi = {10.1016/j.websem.2015.01.003}, -} - - - - - -@ARTICLE{bechhofer13, - author = {{Bechhofer}, S. and {Buchan}, I. and {De Roure}, D. and {Missier}, P. and {Ainsworth}, J. and {Bhagat}, J. and Couch, P. and Cruickshank, D. and {Delderfield}, M and Dunlop, I. and {Gamble}, M. and {Michaelides}, D. and {Owen}, S. and {Newman}, D. and {Sufi}, S. and {Goble}, C}, - title = "{Why linked data is not enough for scientists}", - journal = {Future Generation Computer Systems}, - year = 2013, - volume = 29, - pages = {599}, - doi = {10.1016/j.future.2011.08.004}, -} - - - - - -@ARTICLE{peng15, - author = {{Peng}, R.D.}, - title = {The reproducibility crisis in science: A statistical counterattack}, - journal = {Significance}, - year = 2015, - month = jun, - volume = 12, - pages = {30}, - doi = {10.1111/j.1740-9713.2015.00827.x}, -} - - - - - -@ARTICLE{katz14, - author = {Daniel S. Katz}, - title = {Transitive Credit as a Means to Address Social and Technological Concerns Stemming from Citation and Attribution of Digital Products}, - journal = {Journal of Open Research Software}, - year = {2014}, - volume = {2}, - pages = {e20}, - doi = {10.5334/jors.be}, -} - - - - - -@ARTICLE{herndon14, - author = {Thomas Herndon and Michael Ash and Robert Pollin}, - title = {Does high public debt consistently stifle economic growth? A critique of Reinhart and Rogoff}, - journal = {Cambridge Journal of Economics}, - year = {2014}, - month = {dec}, - volume = {38}, - pages = {257}, - doi = {10.1093/cje/bet075}, -} - - - - - -@ARTICLE{easterbrook14, - author = {{Easterbook}, S.}, - title = {Open code for open science?}, - journal = {Nature Geoscience}, - year = 2014, - month = oct, - volume = 7, - pages = {779}, - doi = {10.1038/ngeo2283}, -} - - - - - -@ARTICLE{fomel13, - author = {Sergey Fomel and Paul Sava and Ioan Vlad and Yang Liu and Vladimir Bashkardin}, - title = {Madagascar: open-source software project for multidimensional data analysis and reproducible computational experiments}, - journal = {Journal of open research software}, - year = {2013}, - volume = {1}, - pages = {e8}, - doi = {10.5334/jors.ag}, -} - - - - - -@ARTICLE{sandve13, - author = {{Sandve}, G.K. and {Nekrutenko}, A. and {Taylor}, J. and {Hovig}, E.}, - title = {Ten Simple Rules for Reproducible Computational Research}, - journal = {PLoS Computational Biology}, - year = 2013, - month = oct, - volume = 9, - pages = {e1003285}, - doi = {10.1371/journal.pcbi.1003285}, -} - - - - - -@ARTICLE{malik13, - author = {Tanu Malik and Quan Pham and Ian Foster}, - title = {SOLE: Towards Descriptive and Interactive Publications}, - journal = {Implementing Reproducible Research}, - year = 2013, - volume = {Chapter 2}, - pages = {1. URL: \url{https://osf.io/ns2m3}}, -} - - - - - -@ARTICLE{gronenschild12, - author = {Ed H. B. M. Gronenschild and Petra Habets and Heidi I. L. Jacobs and Ron Mengelers and Nico Rozendaal and Jim van Os and Machteld Marcelis}, - title = {The Effects of FreeSurfer Version, Workstation Type, and Macintosh Operating System Version on Anatomical Volume and Cortical Thickness Measurements}, - journal = {PLoS ONE}, - volume = {7}, - year = {2012}, - pages = {e38234}, - doi = {10.1371/journal.pone.0038234}, -} - - - - - -@ARTICLE{pham12, - author = {Quan Pham and Tanu Malik and Ian Foster and Roberto {Di Lauro} and Raffaele Montella}, - title = {SOLE: Linking Research Papers with Science Objects}, - journal = {Provenance and Annotation of Data and Processes (IPAW)}, - year = {2012}, - pages = {203}, - doi = {10.1007/978-3-642-34222-6_16}, -} - - - - - -@ARTICLE{davison12, - author = {Andrew Davison}, - title = {Automated Capture of Experiment Context for Easier Reproducibility in Computational Research}, - journal = {Computing in Science \& Engineering}, - volume = {14}, - year = {2012}, - pages = {48}, - doi = {10.1109/MCSE.2012.41}, -} - - - - - -@ARTICLE{zhao12, - author = {Jun Zhao and Jose Manuel Gomez-Perez and Khalid Belhajjame and Graham Klyne and Esteban Garcia-Cuesta and Aleix Garrido and Kristina Hettne and Marco Roos and David {De Roure} and Carole Goble}, - title = {Why workflows break — Understanding and combating decay in Taverna workflows}, - journal = {IEEE 8th International Conference on E-Science}, - year = {2012}, - pages = {1}, - doi = {10.1109/eScience.2012.6404482}, -} - - - - -@ARTICLE{vangorp11, - author = {Pieter {Van Gorp} and Steffen Mazanek}, - title = {SHARE: a web portal for creating and sharing executable research}, - journal = {Procedia Computer Science}, - year = 2011, - volume = 4, - pages = {589}, - doi = {10.1016/j.procs.2011.04.062}, -} - - - - - -@ARTICLE{hinsen11, - author = {{Hinsen}, Konrad}, - title = {A data and code model for reproducible research and executable papers}, - journal = {Procedia Computer Science}, - year = 2011, - volume = 4, - pages = {579}, - doi = {10.1016/j.procs.2011.04.061}, -} - - - - - -@ARTICLE{limare11, - author = {Nicolas Limare and Jean-Michel Morel}, - title = {The IPOL Initiative: Publishing and Testing Algorithms on Line for -Reproducible Research in Image Processing}, - journal = {Procedia Computer Science}, - year = 2011, - volume = 4, - pages = {716}, - doi = {10.1016/j.procs.2011.04.075}, -} - - - - - -@ARTICLE{gavish11, - author = {Matan Gavish and David L. Donoho}, - title = {A Universal Identifier for Computational Results}, - journal = {Procedia Computer Science}, - year = 2011, - volume = 4, - pages = {637}, - doi = {10.1016/j.procs.2011.04.067}, -} - - - - -@ARTICLE{gabriel11, - author = {Ann Gabriel and Rebecca Capone}, - title = {Executable Paper Grand Challenge Workshop}, - journal = {Procedia Computer Science}, - volume = {4}, - year = {2011}, - pages = {577}, - doi = {10.1016/j.procs.2011.04.060}, -} - - - - - -@ARTICLE{nowakowski11, - author = {Piotr Nowakowski and Eryk Ciepiela and Daniel Har\k{e}\.{z}lak and Joanna Kocot and Marek Kasztelnik and Tomasz Barty\'nski and Jan Meizner and Grzegorz Dyk and Maciej Malawski}, - title = {The Collage Authoring Environment}, - journal = {Procedia Computer Science}, - volume = {4}, - year = {2011}, - pages = {608}, - doi = {j.procs.2011.04.064}, -} - - - - - -@ARTICLE{peng11, - author = {{Peng}, R.D.}, - title = {Reproducible Research in Computational Science}, - journal = {Science}, - year = {2011}, - month = dec, - volume = 334, - pages = {1226}, - doi = {10.1126/science.1213847}, -} - - - - - -@ARTICLE{gil10, - author = {Yolanda Gil and Pedro A. González-Calero and Jihie Kim and Joshua Moody and Varun Ratnakar}, - title = {A semantic framework for automatic generation of computational workflows using distributed data and component catalogues}, - journal = {Journal of Experimental \& Theoretical Artificial Intelligence}, - year = {2010}, - volume = {23}, - pages = {389}, - doi = {10.1080/0952813X.2010.490962}, -} - - - - - -@ARTICLE{pence10, - author = {{Pence}, W.~D. and {Chiappetti}, L. and {Page}, C.~G. and {Shaw}, R.~A. and - {Stobie}, E.}, - title = "{Definition of the Flexible Image Transport System (FITS), version 3.0}", - journal = {Astronomy and Astrophysics}, - keywords = {instrumentation: miscellaneous, methods: miscellaneous, techniques: miscellaneous, reference systems, standards, astronomical databases: miscellaneous}, - year = "2010", - month = "Dec", - volume = {524}, - eid = {A42}, - pages = {A42}, - doi = {10.1051/0004-6361/201015362}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2010A\&A...524A..42P}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{goecks10, - author = {Jeremy Goecks and Anton Nekrutenko and James Taylor}, - title = {Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences}, - journal = {Genome Biology}, - year = {2010}, - volume = {11}, - pages = {R86}, - doi = {10.1186/gb-2010-11-8-r86}, -} - - - - - -@ARTICLE{merali10, - author = {Zeeya Merali}, - title = {Computational science: ...Error}, - journal = {Nature}, - year = 2010, - volume = 467, - pages = {775}, - doi = {10.1038/467775a}, -} - - - - - -@ARTICLE{casadevall10, - author = {{Casadevall}, A. and {Fang}, F.C}, - title = {Reproducible Science}, - journal = {Infection and Immunity}, - year = 2010, - volume = 78, - pages = {4972}, - doi = {10.1128/IAI.00908-10}, -} - - - - - -@ARTICLE{mesirov10, - author = {{Mesirov}, J.P.}, - title = {Accessible Reproducible Research}, - journal = {Science}, - year = 2010, - volume = 327, - pages = {415}, - doi = {10.1126/science.1179653}, -} - - - - - -@ARTICLE{cheney09, - author = {James Cheney and Laura Chiticariu and Wang-Chiew Tan}, - title = {Provenance in Databases: Why, How, and Where}, - journal = {Foundations and Trends in Databases}, - year = {2009}, - volume = {1}, - pages = {379}, - doi = {10.1561/1900000006}, -} - - - - - -@ARTICLE{ioannidis2009, - author = {John P. A. Ioannidis and David B. Allison and Catherine A. Ball and Issa Coulibaly and Xiangqin Cui and Aedín C Culhane and Mario Falchi and Cesare Furlanello and Laurence Game and Giuseppe Jurman and Jon Mangion and Tapan Mehta and Michael Nitzberg and Grier P. Page and Enrico Petretto and Vera {van Noort}}, - title = {Repeatability of published microarray gene expression analyses}, - journal = {Nature Genetics}, - year = {2009}, - volume = {41}, - pages = {149}, - doi = {10.1038/ng.295}, -} - - - - - -@ARTICLE{fomel09, - author = {Sergey Fomel and Jon F. Claerbout}, - title = {Reproducible Research}, - journal = {Computing in Science Engineering}, - year = {2009}, - volume = {11}, - pages = {5}, - doi = {10.1109/MCSE.2009.14}, -} - - - - - -@ARTICLE{baggerly09, - author = {Keith A. Baggerly and Kevin R Coombes}, - title = {Deriving chemosensitivity from cell lines: Forensic bioinformatics and reproducible research in high-throughput biology}, - journal = {The Annals of Applied Statistics}, - year = {2009}, - volume = {3}, - pages = {1309}, - doi = {10.1214/09-AOAS291}, -} - - - - - -@ARTICLE{scheidegger08, - author = {Carlos Scheidegger and David Koop and Emanuele Santos and Huy Vo and Steven Callahan and Juliana Freire and Cláudio Silva}, - title = {Tackling the Provenance Challenge one layer at a time}, - journal = {Concurrency Computation: Practice and Experiment}, - year = {2008}, - volume = {20}, - pages = {473}, - doi = {10.1002/cpe.1237}, -} - - - - - -@ARTICLE{moreau08, - author = {Moreau, Luc and Ludäscher, Bertram and Altintas, Ilkay and Barga, Roger S. and Bowers, Shawn and Callahan, Steven and Chin JR., George and Clifford, Ben and Cohen, Shirley and Cohen-Boulakia, Sarah and Davidson, Susan and Deelman, Ewa and Digiampietri, Luciano and Foster, Ian and Freire, Juliana and Frew, James and Futrelle, Joe and Gibson, Tara and Gil, Yolanda and Goble, Carole and Golbeck, Jennifer and Groth, Paul and Holland, David A. and Jiang, Sheng and Kim, Jihie and Koop, David and Krenek, Ales and McPhillips, Timothy and Mehta, Gaurang and Miles, Simon and Metzger, Dominic and Munroe, Steve and Myers, Jim and Plale, Beth and Podhorszki, Norbert and Ratnakar, Varun and Santos, Emanuele and Scheidegger, Carlos and Schuchardt, Karen and Seltzer, Margo and Simmhan, Yogesh L. and Silva, Claudio and Slaughter, Peter and Stephan, Eric and Stevens, Robert and Turi, Daniele and Vo, Huy and Wilde, Mike and Zhao, Jun and Zhao, Yong}, - title = {The First Provenance Challenge}, - journal = {Concurrency Computation: Practice and Experiment}, - year = {2008}, - volume = {20}, - pages = {473}, - doi = {10.1002/cpe.1233}, -} - - - - - -@Article{matplotlib2007, - Author = {Hunter, J. D.}, - Title = {Matplotlib: A 2D graphics environment}, - Journal = {CiSE}, - Volume = {9}, - Number = {3}, - Pages = {90}, - abstract = {Matplotlib is a 2D graphics package used for Python - for application development, interactive scripting, and - publication-quality image generation across user - interfaces and operating systems.}, - publisher = {IEEE COMPUTER SOC}, - doi = {10.1109/MCSE.2007.55}, - year = 2007 -} - - - - - -@ARTICLE{witten2007, - author = {Ben Witten and Bill Curry and Jeff Shragge}, - title = {A New Build Environment for SEP}, - journal = {Stanford Exploration Project}, - year = {2007}, - volume = {129}, - pages = {247: \url{http://sepwww.stanford.edu/data/media/public/docs/sep129/ben1.pdf}}, -} - - - - - -@ARTICLE{miller06, - author = {Greg Miller}, - title = {A Scientist's Nightmare: Software Problem Leads to Five Retractions}, - journal = {Science}, - year = {2006}, - volume = {314}, - pages = {1856}, - doi = {10.1126/science.314.5807.1856}, -} - - - - - -@ARTICLE{reich06, - author = {Michael Reich and Ted Liefeld and Joshua Gould and Jim Lerner and Pablo Tamayo and Jill P Mesirov}, - title = {GenePattern 2.0}, - journal = {Nature Genetics}, - year = {2006}, - volume = {38}, - pages = {500}, - doi = {10.1038/ng0506-500}, -} - - - - - -@ARTICLE{ludascher05, - author = {Ludäs\-cher, Bertram and Altintas, Ilkay and Berkley, Chad and Higgins, Dan and Jaeger, Efrat and Jones, Matthew and Lee, Edward A. and Tao, Jing and Zhao, Yang}, - title = {Scientific workflow management and the Kepler system}, - journal = {Concurrency Computation: Practice and Experiment}, - year = {2006}, - volume = {18}, - pages = {1039}, - doi = {10.1002/cpe.994}, -} - - - - - -@ARTICLE{ioannidis05, - author = {John P. A. Ioannidis}, - title = {Why Most Published Research Findings Are False}, - journal = {PLoS Medicine }, - year = {2005}, - volume = {2}, - pages = {e124}, - doi = {10.1371/journal.pmed.0020124}, -} - - - - - -@ARTICLE{bavoil05, - author = {Louis Bavoil and Steven P. Callahan and Patricia J. Crossno and Juliana Freire and Carlos E. Scheidegger and Cláudio T. Silva and Huy T. Vo}, - title = {VisTrails: Enabling Interactive Multiple-View Visualizations}, - journal = {VIS 05. IEEE Visualization}, - year = {2005}, - volume = {}, - pages = {135}, - doi = {10.1109/VISUAL.2005.1532788}, -} - - - - - -@ARTICLE{dolstra04, - author = {{Dolstra}, Eelco and {de Jonge}, Merijn and {Visser}, Eelco}, - title = {Nix: A Safe and Policy-Free System for Software Deployment}, - journal = {Large Installation System Administration Conference}, - year = {2004}, - volume = {18}, - pages = {79. \url{https://www.usenix.org/legacy/events/lisa04/tech/full_papers/dolstra/dolstra.pdf}}, -} - - - - - -@ARTICLE{oinn04, - author = {Oinn, Tom and Addis, Matthew and Ferris, Justin and Marvin, Darren and Senger, Martin and Greenwood, Mark and Carver, Tim and Glover, Kevin and Pocock, Matthew R. and Wipat, Anil and Li, Peter}, - title = {Taverna: a tool for the composition and enactment of bioinformatics workflows}, - journal = {Bioinformatics}, - year = {2004}, - volume = {20}, - pages = {3045}, - doi = {10.1093/bioinformatics/bth361}, -} - - - - - -@ARTICLE{schwab2000, - author = {Matthias Schwab and Martin Karrenbach and Jon F. Claerbout}, - title = {Making scientific computations reproducible}, - journal = {Computing in Science \& Engineering}, - year = {2000}, - volume = {2}, - pages = {61}, - doi = {10.1109/5992.881708}, -} - - - - - -@ARTICLE{buckheit1995, - author = {Jonathan B. Buckheit and David L. Donoho}, - title = {WaveLab and Reproducible Research}, - journal = {Wavelets and Statistics}, - year = {1995}, - volume = {1}, - pages = {55}, - doi = {10.1007/978-1-4612-2544-7\_5}, -} - - - - - -@ARTICLE{claerbout1992, - author = {Jon F. Claerbout and Martin Karrenbach}, - title = {Electronic documents give reproducible research a new meaning}, - journal = {SEG Technical Program Expanded Abstracts}, - year = {1992}, - volume = {1}, - pages = {601}, - doi = {10.1190/1.1822162}, -} - - - - - -@ARTICLE{eker03, - author = {Johan Eker and Jorn W Janneck and Edward A. Lee and Jie Liu and Xiaojun Liu and Jozsef Ludvig and Sonia Sachs and Yuhong Xiong and Stephen Neuendorffer}, - title = {Taming heterogeneity - the Ptolemy approach}, - journal = {Proceedings of the IEEE}, - year = {2003}, - volume = {91}, - pages = {127}, - doi = {10.1109/JPROC.2002.805829}, -} - - - - - -@ARTICLE{stevens03, - author = {Robert Stevens and Kevin Glover and Chris Greenhalgh and Claire Jennings and Simon Pearce and Peter Li and Melena Radenkovic and Anil Wipat}, - title = {Performing in silico Experiments on the Grid: A Users Perspective}, - journal = {Proceedings of UK e-Science All Hands Meeting}, - year = {2003}, - pages = {43}, -} - - - - - -@ARTICLE{knuth84, - author = {Donald Knuth}, - title = {Literate Programming}, - journal = {The Computer Journal}, - year = {1984}, - volume = {27}, - pages = {97}, - doi = {10.1093/comjnl/27.2.97}, -} - - - - - -@ARTICLE{stallman88, - author = {Richard M. Stallman and Roland McGrath and Paul D. Smith}, - title = {GNU Make: a program for directing recompilation}, - journal = {Free Software Foundation}, - year = {1988}, - pages = {ISBN:1-882114-83-3. \url{https://www.gnu.org/s/make/manual/make.pdf}}, -} - - - - - -@ARTICLE{somogyi87, - author = {Zoltan Somogyi}, - title = {Cake: a fifth generation version of make}, - journal = {University of Melbourne}, - year = {1987}, - pages = {1: \url{https://pdfs.semanticscholar.org/3e97/3b5c9af7763d70cdfaabdd1b96b3b75b5483.pdf}}, -} - - - - - -@ARTICLE{feldman79, - author = {Stuart I. Feldman}, - title = {Make -- a program for maintaining computer programs}, - journal = {Journal of Software: Practice and Experience}, - volume = {9}, - pages = {255}, - year = {1979}, - doi = {10.1002/spe.4380090402}, -} - - - - - -@ARTICLE{mcilroy78, - author = {M. D. McIlroy and E. N. Pinson and B. A. Tague}, - title = {UNIX Time-Sharing System: Forward}, - journal = {\doihref{https://archive.org/details/bstj57-6-1899/mode/2up}{Bell System Technical Journal}}, - year = {1978}, - volume = {57}, - pages = {6, ark:/13960/t0gt6xf72}, - doi = {}, -} - - - - - -@ARTICLE{anscombe73, - author = {{Anscombe}, F.J.}, - title = {Graphs in Statistical Analysis}, - journal = {The American Statistician}, - year = {1973}, - volume = {27}, - pages = {17}, - doi = {10.1080/00031305.1973.10478966}, -} - - - - - -@ARTICLE{roberts69, - author = {{Roberts}, K.V.}, - title = {The publication of scientific fortran programs}, - journal = {Computer Physics Communications}, - year = {1969}, - volume = {1}, - pages = {1}, - doi = {10.1016/0010-4655(69)90011-3}, -} -- cgit v1.2.1