From 7b008dfbb9b2f6a1f5145e3841464e723f590feb Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Fri, 22 May 2020 02:15:06 +0100 Subject: Re-write of the paper to fit in ~6000 words and IEEE format Following the fact that the DSJ editor decided that this paper doesn't fit into their scope, we decided to submit it to IEEE's Computing in Science and Engineering (CiSE). So with this commit the text was re-written to fit into their style and word-count limitations. --- paper.tex | 456 ++++-- project | 5 + reproduce/analysis/config/demo-year.conf | 3 + reproduce/analysis/config/menke-demo-year.conf | 3 - reproduce/analysis/make/demo-plot.mk | 4 +- reproduce/analysis/make/format.mk | 2 +- reproduce/analysis/make/paper.mk | 11 +- reproduce/software/config/texlive-packages.conf | 4 +- tex/src/figure-branching.tex | 6 +- tex/src/figure-data-lineage.tex | 18 +- tex/src/figure-tools-per-year.tex | 2 +- tex/src/preamble-project.tex | 37 +- tex/src/references.bib | 1772 ---------------------- tex/src/references.tex | 1786 +++++++++++++++++++++++ 14 files changed, 2189 insertions(+), 1920 deletions(-) create mode 100644 reproduce/analysis/config/demo-year.conf delete mode 100644 reproduce/analysis/config/menke-demo-year.conf delete mode 100644 tex/src/references.bib create mode 100644 tex/src/references.tex diff --git a/paper.tex b/paper.tex index 8d7bde9..8ef2095 100644 --- a/paper.tex +++ b/paper.tex @@ -60,20 +60,16 @@ % in the abstract or keywords. \begin{abstract} %% CONTEXT - Many reproducible workflow solutions have been proposed over recent decades. - Most use the high-level technologies that were popular when they were created, providing an immediate solution that is unlikely to be sustainable in the long term. - Decades later, scientists lack the resources to rewrite their projects, while still being accountable for their results. - This creates generational gaps, which, together with technological obsolescence, impede reproducibility and building upon previous work. + Reproducible workflow solutions commonly use the high-level technologies that were popular when they were created, providing an immediate solution that is unlikely to be sustainable in the long term. %% AIM We aim to introduce a set of criteria to address this problem and to demonstrate their practicality. %% METHOD The criteria have been tested in several research publications and can be summarized as: completeness (no dependency beyond a POSIX-compatible operating system, no administrator privileges, no network connection and storage primarily in plain-text); modular design; linking analysis with narrative; temporal provenance; scalability; and free-and-open-source software. %% RESULTS - Through an implementation, called ``Maneage'' (managing+lineage), we find that storing the project in machine-actionable and human-readable plain-text, enables version-control, cheap archiving, automatic parsing to extract data provenance, and peer-reviewable verification. - Furthermore, we show that these criteria are not limited to long-term reproducibility but also provide immediate, fast short-term reproducibility. + Through an implementation, called ``Maneage'' (managing+lineage), we find that storing the project in machine-actionable and human-readable plain-text, enables version-control, cheap archiving, automatic parsing to extract data provenance, and peer-review-able verification. + Furthermore, we find that these criteria are not limited to long-term reproducibility but also provide immediate, fast short-term reproducibility benefits. %% CONCLUSION - We conclude that requiring longevity from solutions is realistic. - We discuss the benefits of these criteria for scientific progress. + We conclude that requiring longevity from solutions is realistic and discuss the benefits of these criteria for scientific progress. \end{abstract} % Note that keywords are not normally used for peerreview papers. @@ -101,130 +97,348 @@ Data Lineage, Provenance, Reproducibility, Scientific Pipelines, Workflows \section{Introduction} % The very first letter is a 2 line initial drop letter followed % by the rest of the first word in caps. -\IEEEPARstart{T}{his} demo file is intended to serve as a ``starter file'' -for IEEE journal papers produced under \LaTeX\ using -IEEEtran.cls version 1.8b and later. -% You must have at least 2 lines in the paragraph with the drop letter -% (should never be an issue) -Here is an example citation \cite{akhlaghi19}. - - - - -\section{Principles} -\label{sec:principles} - -The core principle of Maneage is simple: science is defined primarily by its method, not its result. -As \cite{buckheit1995} describe it, modern scientific papers are merely advertisements of scholarship, while the actual scholarship is the coding behind the plots/results. -Many solutions have been proposed in the last decades, including (but not limited to) -1992: \href{https://sep.stanford.edu/doku.php?id=sep:research:reproducible}{RED}, -2003: \href{https://taverna.incubator.apache.org}{Apache Taverna}, -2004: \href{https://www.genepattern.org}{GenePattern}, -2010: \href{https://wings-workflows.org}{WINGS}, -2011: \href{https://www.ipol.im}{Image Processing On Line journal} (IPOL), - \href{https://www.activepapers.org}{Active papers}, - \href{https://is.ieis.tue.nl/staff/pvgorp/share}{SHARE}, -2015: \href{https://sciunit.run}{Sciunit}; -2017: \href{https://falsifiable.us}{Popper}; -2019: \href{https://wholetale.org}{WholeTale}. -To help in the comparison, the founding principles of Maneage are listed below. - - -\begin{enumerate}%[label={\bf P\arabic*] -\item \label{principle:complete}\textbf{Completeness:} - A project that is complete, or self-contained, - (P1.1) has no dependency beyond the Port\-able Operating System (OS) Interface, or POSIX, or a minimal Unix-like environment. - A consequence of this is that the project itself must be stored in plain-text: not needing any specialized software to open, parse or execute. - (P1.2) does not affect the host, - (P1.3) does not require root, or administrator, privileges, - (P1.4) builds its software for an independent environment, - (P1.5) can be run locally (without internet connection), - (P1.6) contains the full project's analysis, visualization \emph{and} narrative, from access to raw inputs to producing final published format (e.g., PDF or HTML), - (P1.7) requires no manual/human interaction and can run automatically \cite[according to][``\emph{a clerk can do it}'']{claerbout1992}. - - \emph{Comparison with existing:} with many dependencies beyond POSIX, except for IPOL, none of the tools above are complete. - For example, the workflow of most recent solutions need Python or Jupyter notebooks. - Because of their complexity (see \ref{principle:complexity}), pre-built binary blobs like containers or virtual machines are the chosen storage format, which are large (Giga-bytes) and expensive to archive. - Furthermore, third-party package managers setup the environment, like Conda, or the OS's, like apt or yum. - However, exact versions of \emph{every software} are rarely included, and the servers remove old binaries, hence blobs are hard to recreate. - Blobs also have a short lifespan, e.g., Docker containers made today, may not be operable with future versions of Docker or Linux (currently Linux 3.2.x is the earliest supported version, released in 2012). - In general they mostly aim for short-term reproducibility. - A plain-text project is readable by humans and machines (even if it can't be executed) and consumes no less than a megabyte. - -\item \label{principle:modularity}\textbf{Modularity:} -A project should be compartmentalized into independent modules with well-defined inputs/outputs having no side effects. -Communication between the independent modules should be explicit, providing several optimizations: -(1) independent modules can run in parallel. -Modules that do not need to be run (because their dependencies have not changed) will not be re-run. -(2) Data provenance extraction (recording any dataset's origins). -(3) Citation: others can credit specific parts of a project. -(4) Usage in other projects. -(5) Most importantly: they are easy to debug and improve. - -\emph{Comparison with existing:} Visual workflow tools like Apache Taverna, GenePattern, Kepler or VisTrails encourage this, but the more recent tools (mostly written in Python) leave this to project authors. -However, designing a modular project needs to be encouraged and facilitated. -Otherwise, scientists, who are not usually trained in data management, will rarely design a modular project, leading to great inefficiencies in terms of project cost and/or scientific accuracy (testing/validating will be expensive). - -\item \label{principle:complexity}\textbf{Minimal complexity:} - This is Ockham's razor extrapolated to project management \cite[``\emph{Never posit pluralities without necessity}''][]{schaffer15}: - 1) avoid complex relations between analysis steps (related to \ref{principle:modularity}). - 2) avoid the programming language that is currently in vogue, because it is going to fall out of fashion soon and require significant resources to translate or rewrite it every few years (to stay fashionable). - The same job can be done with more stable/basic tools, requiring less long-term effort. - - \emph{Comparison with existing:} IPOL stands out here too (requiring only ISO C), however most others are written in Python, and use Conda or Jupyter (see \ref{principle:complete}). - Besides being incomplete (\ref{principle:complete}), these tools have short lifespans and evolve fast (e.g., Python 2 code cannot run with Python 3, causing disruption in many projects). - Their complex dependency trees also making them hard to maintain, for example, see the dependency tree of Matlplotlib in \cite[][Figure 1]{alliez19}, its one of the simpler Jupyter dependencies. - The longevity of a workflow is determined by its shortest-lived dependency. - -\item \label{principle:verify}\textbf{Verifiable inputs and outputs:} -The project should automatically verify its inputs (software source code and data) \emph{and} outputs, not needing expert knowledge to confirm a reproduction. - -\emph{Comparison with existing:} Such verification is usually possible in most systems, but as a responsibility of the project authors. -As with \ref{principle:modularity}, due to lack of training, if not actively encouraged and facilitated, it will not be implemented. - -\item \label{principle:history}\textbf{History and temporal provenance:} +%\IEEEPARstart{F}{irst} word + +Reproducible research has been discussed in the sciences for about 30 years \cite{claerbout1992, fineberg19}. +Many solutions have been proposed, mostly relying on the common technology of the day: starting with Make and Matlab libraries in the 1990s, to Java in the 2000s and in the last decade they are mostly based on Python. +Recently controlling the environment has been facilitated through generic package managers (PMs) and containers. + +However, because of their high-level nature, such third party tools for the workflow (not the analysis) grow very fast, e.g., Python 2 code cannot run with Python 3, interrupting many projects. +Furthermore, containers (in custom binary formats) are also being heavily used recently, but are large (Gigabytes) and expensive to archive. +Also, once the binary format is obsolete, reading or parsing the project is not possible. + +The cost of staying up to date with this evolving landscape is high. +Scientific projects in particular suffer the most: scientists have to focus on their own research domain, but they also need to understand the used technology to a certain level, because it determines their results and interpretations. +Decades later, they are also still held accountable for their results. +Hence the evolving technology creates generational gaps in the scientific community, not allowing the previous generations to share valuable lessons which are too low-level to be published in a traditional scientific paper. +As a solution to this problem, here we introduce a criteria that can guarantee the longevity of a project based on our experiences with existing solutions. + + + + + +\section{Commonly used tools and their longevity} +To highlight the proposed criteria, some of the most commonly used tools are reviewed from the long-term usability perspective. +We recall that while longevity is important in some fields (like the sciences), it isn't necessarilyy of interest in others (e.g., short term commercial projects), hence the wide usage of tools the evolve very fast. +Most existing reproducible workflows use a common set of third-party tools that can be categozied as: +(1) Environment isolators like virtual machines, containers and etc. +(2) PMs like Conda, Nix, or Spack, +(3) Job orchestrators like scripts, Make, SCons, and CGAT-core, +(4) Notebooks like Jupyter. + +To isolate the environment, virtual machines (VMs) have sometimes been used, e.g., in \href{https://is.ieis.tue.nl/staff/pvgorp/share}{SHARE} (which was awarded 2nd prize in the Elseiver Executable Paper Grand Challenge of 2011 and discontinued in 2019). +However, containers (in particular Docker and lesser, Singularity) are by far the most used solution today, so we'll focus on Docker here. + +%% Note that L. Barba (second author of this paper) is the editor in chief of CiSE. +Ideally, is possible to precisely version/tag the images that are imported into a Docker container. +But that is rarely practiced in most solutions that we have studied. +Usually images are imported with generic operating system names e.g., `\inlinecode{FROM ubuntu:16.04}'\cite{mesnard20}. +The extracted tarball (from \url{https://partner-images.canonical.com/core/xenial}) is updated with different software versions almost monthly and only archives the last 5 images. +Hence if the Dockerfile is run in different months, it will contain different core operating system components. +Furthermore, in the year 2024, when the long-term support for this version of Ubuntu expires, it will be totally removed. +This is similar in other OSs: pre-built binary files are large and expensive to maintain and archive. +Furthermore Docker requires root permissions, and only supports recent (in ``long-term-support'') versions of the host kernel, hence older Docker images may not be executable. + +Once the host OS is ready, PMs are used to install the software, or environment. +Usually the OS's PM, like `\inlinecode{apt}' or `\inlinecode{yum}', is used first and higher-level software are built with more generic PMs like Conda, Nix, GNU Guix or Spack. +The OS PM suffers from the same longevity problem as the OS. +Some third-party tools like Conda and Spack are written in high-level languages like Python, so the PM itself depends on the host's Python installation. +Nix and GNU Guix don't have any dependencies and produce bit-wise identical programs, however, they need root permissions. +Generally the exact version of each software's dependencies isn't precisely identified in the build instructions (although it is possible). +Therefore unless precise versions of \emph{every software} are stored, they will use the most recent version. +Furthermore, because each third party PM introduces its own language and framework, they increase the project's complexity. + +With the software environment built, job management is the next component of a workflow. +Visual workflow tools like Apache Taverna, GenePattern, Kepler or VisTrails (mostly introduced in the 2000s and using Java) do encourage modularity and robust job management, but the more recent tools (mostly in Python) leave this to project authors. +Designing a modular project needs to be encouraged and facilitated because scientists (who are not usually trained in data management) will rarely apply best practices in project management and data carpentry. +This includes automatic verification: while it is possible in many solutions, it is rarely practiced. +This leads to many inefficiencies in project cost and/or scientific accuracy (reusing, expanding or validating will be expensive). + +Finally, to add narrative, computational notebooks\cite{rule18}, like Jupyter, are being increasingly used in many solutions. +However, the complex dependency trees of such web-based tools make them very vulnerable to the passage of time, e.g., see Figure 1 of \cite{alliez19} for the dependencies of Matplotlib; one of the more simple Jupyter dependencies. +The longevity of a project is determined by its shortest-lived dependency. +Furthermore, similar to the point above on job management, by not actively encouraging good practices in programming or project management, such tools can rarely deliver their promised potential\cite{rule18} or can even hamper reproducibility \cite{pimentel19}. + +An exceptional solution we encountered was the Image Processing Online Journal (IPOL, \href{https://www.ipol.im}{ipol.im}). +Submitted papers must be accompanied by an ISO C implementation of their algorithm (which is build-able on all operating systems) with example images/data that can also be executed on their webpage. +This is possible due to the focus on low-level algorithms that don't need any dependencies beyond an ISO C compiler. +Many data-intensive projects, commonly involve dozens of high-level dependencies, with large and complex data formats and analysis, hence this solution isn't scalable. + + + + + +\section{Proposed criteria for longevity} + +The main premise is that starting a project with robust data management strategy (or tools that provide it) is much more effective, for the researchers and community, than imposing it in the end \cite{austin17,fineberg19}. +Researchers play a critical role\cite{austin17} in making their research more Findabe, Accessible, Interoperable, and Reusable (the FAIR principles). +Actively curating workflows for evolving technologies by repositories alone is not practically feasible, or scalable. +In this paper we argue that workflows that satisfy the criteria below can reduce the cost of curation for repositories, while maximizing the FAIRness of the deliverables for future researchers. + +\textbf{Criteria 1: Completeness.} +A project that is complete, or self-contained, has the following properties: +(1) has no dependency beyond the Portable Operating System (OS) Interface, or POSIX. +IEEE defined POSIX (a minimal Unix-like environment) and many OSs have complied. +It is thus a sufficiently reliable foundation for longevity in execution. +(2) No dependency implies that the project itself must be primarily stored in plain-text: not needing specialized software to open, parse or execute. +(3) Does not affect the host OS (its libraries, programs, or environment). +(4) Does not require root or administrator privileges. +(5) Builds its own controlled software for an independent environment. +(6) Can run locally (without internet connection). +(7) Contains the full project's analysis, visualization \emph{and} narrative: from access to raw inputs to doing the analysis, producing final data products \emph{and} its final published report with figures, e.g., PDF or HTML. +(8) Can run automatically, with no human interaction. + +\textbf{Criteria 2: Modularity.} +A modular project enables and encourages the analysis to be broken into independent modules with well-defined inputs/outputs and minimal side effects. +Explicit communication between various modules enables optimizations on many levels: +(1) Execution in parallel and avoiding redundancies (when a dependency of a module has not changed, it will not be re-run). +(2) Usage in other projects. +(3) Easy to debug and improve. +(4) Facilitates citation of specific parts, +(5) Provenance extraction. + +\textbf{Criteria 3: Minimal complexity.} +Minimal complexity can be interpreted as +(1) avoiding the language or framework that is currently in vogue (for the workflow, not necessarily the high-level analysis). +Because it is going to fall out of fashion soon and require significant resources to translate or rewrite every few years. +More stable/basic tools can also be used with less long-term maintenance. +(2) avoiding too many different languages and frameworks, e.g., when the workflow's PM and analysis are orchestrated in the same framework, it becomes easier to adopt and encourages good practices. + +\textbf{Criteria 4: Scalability.} +A scalable project can easily be used in arbitrarily large and/or complex projects. +On a small scale, the criteria here are trivial to implement, but as the projects get more complex, it can become unsustainable. + +\textbf{Criteria 5: Verifiable inputs and outputs.} +The project should automatically verify its inputs (software source code and data) \emph{and} outputs. +Expert knowledge should not be required to confirm a reproduction, such that ``\emph{a clerk can do it}''\cite{claerbout1992}. + +\textbf{Criteria 6: History and temporal provenance.} No project is done in a single/first attempt. Projects evolve as they are being completed. It is natural that earlier phases of a project are redesigned/optimized only after later phases have been completed. This is often seen in exploratory research papers, with statements like ``\emph{we [first] tried method [or parameter] X, but Y is used here because it gave lower random error}''. -A project's ``history'' is thus as scientifically relevant as the final, or published version. +The ``history'' is thus as valuable as the final/published version. -\emph{Comparison with existing:} The solutions above that implement version control usually support this principle. -However, because the systems as a whole are rarely complete (see \ref{principle:complete}), their histories are also incomplete. -IPOL fails here, because only the final snapshot is published. +\textbf{Criteria 7: Including narrative, linked to analysis.} +A project is not just its computational analysis. +A raw plot, figure or table is hardly meaningful alone, even when accompanied by the code that generated it. +A narrative description is also part of the deliverables (defined as ``data article'' in \cite{austin17}): describing the purpose of the computations, and interpretations of the result, possibly with respect to other projects/papers. +This is related to longevity because if a workflow only contains the steps to do the analysis, or generate the plots, in time, it may be separated from its accompanying published paper. +A raw analysis workflow with no context is hardly useful. -\item \label{principle:scalable}\textbf{Scalability:} -A project should be scalable to arbitrarily large and/or complex projects. +\textbf{Criteria 8: Free and open source software:} +Technically, reproducibility (as defined in \cite{fineberg19}) is possible with non-free or non-open-source software (a black box). +This criteria is thus necessary to complement that definition (nature is already a black box). +As free software, others can learn from, modify, and build upon a project. +When the used software are also free, +(1) The lineage can be traced to the implemented algorithms, possibly enabling optimizations on that level. +(2) It can be modified to work on a future hardware by others. +(3) A non-free software typically cannot be distributed by others, making it reliant on a single server (even without payments). -\emph{Comparison with existing:} -Most of the more recent solutions above are scalable. -However, IPOL, which uniquely stands out in satisfying most principles, fails here: IPOL is devoted to low-level image processing algorithms that \emph{can be} done with no dependencies beyond an ISO C compiler. -IPOL is thus not scalable to large projects, which commonly involve dozens of high-level dependencies, with complex data formats and analysis. + + + + + + + + + +\section{Proof of concept: Maneage} + +Given the limitations of existing tools with the proposed criteria, it is necessary to show a proof of concept. +The proof presented here has already been tested in previously published papers \cite{akhlaghi19, infante20} and was recently awarded a Research Data Alliance (RDA) adoption grant for implementing the recommendations of the joint RDA and World Data System (WDS) working group on Publishing Data Workflows\cite{austin17} from the researcher perspective to ensure longevity. + +The proof of concept is called Maneage (Managing+Lineage, ending is pronounced like ``Lineage''). +It was developed along with the criteria, as a parallel research project in 5 years for publishing our reproducible research workflows with our research. +Its primordial form was implemented in \cite{akhlaghi15} and later evolved in \href{http://doi.org/10.5281/zenodo.1163746}{zenodo.1163746} and \href{http://doi.org/10.5281/zenodo.1164774}{zenodo.1164774}. + +Technically, the hardest criteria to implement was the completeness criteria (and in particular no dependency beyond POSIX), blended with minimal complexity. +One proposed solution was the Guix Workflow Language (GWL) which is written in the same framework (GNU Guile, an implementation of Scheme) as GNU Guix (a PM). +But as natural scientists (astronomers), our background was with languages like Shell, Python, C or Fortran. +Not having any exposure to Lisp/Scheme and their fundamentally different style, made it very hard for us to adopt GWL. +Furthermore, the desired solution was meant to be easily understandable/usable by fellow scientists, which generally also haven't had exposure to Lisp/Scheme. + +Inspired by GWL+Guix, a single job management tool was used for both installing of software \emph{and} the analysis workflow: Make. +Make is not an analysis language, it is a job manager, deciding when to call analysis programs (written in any languge like Shell, Python, Julia or C). +Make is standardized in POSIX and is used in almost all core OS components. +It is thus mature, actively maintained and highly optimized. +Make was recommended by the pioneers of reproducible research\cite{claerbout1992,schwab2000} and many researchers have already had a minimal exposure to it (when building research software). +%However, because they didn't attempt to build the software environment, in 2006 they moved to SCons (Make-simulator in Python which also attempts to manage software dependencies) in a project called Madagascar (\url{http://ahay.org}), which is highly tailored to Geophysics. + +Linking the analysis and narrative was another major design choice. +Literate programming, implemented as Computational Notebooks like Jupyter, is a common solution these days. +However, due to the problems above, we our implementation follows a more abstract design: providing a more direct and precise, but modular (not in the same file) connection. + +Assuming that the narrative is typeset in \LaTeX{}, the connection between the analysis and narrative (usually as numbers) is through \LaTeX{} macros, that are automatically defined during the analysis. +For example, in the abstract of \cite{akhlaghi19} we say `\emph{... detect the outer wings of M51 down to S/N of 0.25 ...}'. +The \LaTeX{} source of the quote above is: `\inlinecode{\small detect the outer wings of M51 down to S/N of \$\textbackslash{}demo\-sf\-optimized\-sn\$}'. +The macro `\inlinecode{\small\textbackslash{}demosfoptimizedsn}' is set during the analysis, and expands to the value `\inlinecode{0.25}' when the PDF output is built. +Such values also depend on the analysis, hence just as plots, figures or tables they should also be reproduced. +As a side-effect, these macros act as a quantifiable link between the narrative and analysis, with the granulity of a word in a sentence and exact analysis command. +This allows accurate provenance tracking \emph{and} automatic updates to the text when any part of the analysis is changed. +Manually typing such numbers in the narrative is prone to errors and discourages experimentation after the first writing of the project. + +The ultimate aim of any project is to produce a report accompaning a dataset with some visualizations, or a research article in a journal, let's call it \inlinecode{paper.pdf}. +Hence the files with the relevant macros of each (modular) step, build the core structure (skeleton) of Maneage. +During the software building (configuration) phase, each package is identified by a \LaTeX{} file, containing its official name, version and possible citation. +In the end, they are combined to enable precise software acknowledgement and citation (see the appendices of \cite{akhlaghi19, infante20}, not included here due to the word-limit). +Simultaneously, they act as Make \emph{targets} and \emph{prerequisite}s to allow accurate dependency tracking and optimized execution (parallel, no redundancies), for any complexity (e.g., Maneage also builds Matplotlib if requested, see Figure 1 of \cite{alliez19}). +Dependencies go down to precise versions of the shell, C compiler, and the C library (task 15390) for an exactly reproducible environment. +To enable easy and fast relocation of the project without building from source, it is possible to build it in any existing container/VM. +The important factor is that, the precise environment isolator is irrelevant, it can always be rebuilt. + +During configuration, only the very high-level choice of which software to built differs between projects. +The Makefiles containig build recipes of each software don't generally change. +However, the analysis will naturally be different from one project to another. +Therefore a design was necessary to satisfy the modularity, scalability and minimal complexity criteria. +To avoid getting too abstract, we will demonstrate it by replicating Figure 1C of \cite{menke20} in Figure \ref{fig:datalineage} (top). +Figure \ref{fig:datalineage} (bottom) is the data lineage graph that produced it (with this whole paper). + +\begin{figure*}[t] + \begin{center} + \includetikz{figure-tools-per-year} + \includetikz{figure-data-lineage} + \end{center} + \vspace{-3mm} + \caption{\label{fig:datalineage} + Top: an enhanced replica of figure 1C in \cite{menke20}, shown here for demonstrating Maneage. + It shows the ratio of papers mentioning software tools (green line, left vertical axis) to total number of papers studied in that year (light red bars, right vertical axis in log-scale). + Bottom: Schematic representation of the data lineage, or workflow, to generate the plot above. + Each colored box is a file in the project and the arrows show the dependencies between them. + Green files/boxes are plain-text files that are under version control and in the project source directory. + Blue files/boxes are output files in the build-directory, shown within the Makefile (\inlinecode{*.mk}) where they are defined as a \emph{target}. + For example, \inlinecode{paper.pdf} depends on \inlinecode{project.tex} (in the build directory; generated automatically) and \inlinecode{paper.tex} (in the source directory; written manually). + The solid arrows and full-opacity built boxes are included with this paper's source. + The dashed arrows and low-opacity built boxes show the scalability by adding hypothetical steps to the project. + } +\end{figure*} + +Analysis is orchestrated in a single point of entry (the Makefile \inlinecode{top-make.mk}). +It is only responsible for \inlinecode{include}-ing the modular \emph{subMakefiles} of the analysis, in the desired order, not doing any analysis itself. +This is shown in Figure \ref{fig:datalineage} (bottom) where all the built/blue files are placed over subMakefiles. +A random reader will be able to understand the high-level logic of the project (irrespective of the low-level implementation details) with simple visual inspection of this file, provided that the subMakefile names are descriptive. +A human-friendly design (that is also optimized for execution) is a critical component of publishing reproducible workflows. + +In all projects \inlinecode{top-make.mk} will first load the subMakefiles \inlinecode{initialize.mk} and \inlinecode{download.mk}, while concluding with \inlinecode{verify.mk} and \inlinecode{paper.mk}. +Project authors add their modular subMakefiles in between (after \inlinecode{download.mk} and before \inlinecode{verify.mk}), in Figure \ref{fig:datalineage} (bottom), the project-specific subMakefiles are \inlinecode{format.mk} \& \inlinecode{demo-plot.mk}. +Except for \inlinecode{paper.mk} which builds the ultimate target \inlinecode{paper.pdf}, all subMakefiles build atleast one file: a \LaTeX{} macro file with the same base-name, see the \inlinecode{.tex} files in each subMakefile of Figure \ref{fig:datalineage}. +The other built files will ultimately (through other files) lead to one of the macro files. + +Irrespective of the number of subMakefiles, there lineaege reaches a bottle-neck in \inlinecode{verify.mk} to satisfy the verification criteria. +All the macro files, plot information and published datasets of the project are verfied with their checksums here to automatically ensure exact reproducibility. +Where exact reproducibility is not possible, values can be verified by any statistical means (specified by the project authors). +Finally, having verified quantitative results, the project builds the ultimate target in \inlinecode{paper.mk}. \begin{figure*}[t] \begin{center} \includetikz{figure-branching}\end{center} \vspace{-3mm} - \caption{\label{fig:branching} Harvesting the power of version-control in project management with Maneage. - Maneage is maintained as a core branch, with projects created by branching off it. - (a) shows how projects evolve on their own branch, but can always update their low-level structure by merging with the core branch + \caption{\label{fig:branching} Maneage is a Git branch, projects using Maneage are branched-off of it and apply their customizations. + (a) shows a hypothetical project's history prior to publication. + The low-level structure (in Maneage, shared between all projects) can be updated by merging with Maneage. (b) shows how a finished/published project can be revitalized for new technologies simply by merging with the core branch. - Each Git ``commit'' is shown on their branches as colored ellipses, with their hash printed in them. - The commits are colored based on the team that is working on that branch. - The collaboration and paper icons are respectively made by `mynamepong' and `iconixar' and downloaded from \url{www.flaticon.com}. + Each Git ``commit'' is shown on its branch as a colored ellipse, with their hash printed in them. + The commits are colored based on their branch. + The collaboration and two paper icons are respectively made by `mynamepong' and `iconixar' from \url{www.flaticon.com}. } \end{figure*} -\item \label{principle:freesoftware}\textbf{Free and open source software:} - Technically, reproducibility \cite{fineberg19} is possible with non-free or non-open-source software (a black box). - This principle is thus necessary to complement that definition (nature is already a black box, we don't need another one): - (1) As a free software, others can learn from, modify, and build upon it. - (2) The lineage can be traced to free software's implemented algorithms, also enabling optimizations on that level. - (3) A free-software package that does not execute on particular hardware can be modified to work on it. - (4) A non-free software project typically cannot be distributed by others, making the whole community reliant on the owner's server (even if the owner does not ask for payments). +To further minimize complexity, the low-level implementation can be further separated from from the high-level execution through configuration files. +By convention in Maneage, the subMakefiles (and the Python, Julia, C, Fortran, or etc, programs that they call for doing the number crunching), only organize the analysis, they don't contain any fixed numbers, settings or parameters. +Parameters are set as Make variables in ``configuration files'' and passed to the respective program (\inlinecode{.conf} files in Figure \ref{fig:datalineage}). +In the demo lineage, \inlinecode{INPUTS.conf} contains URLs and checksums for all imported datasets, enabling exact verification before usage. +As another demo, we report that \cite{menke20} studied $\menkenumpapersdemocount$ papers in $\menkenumpapersdemoyear$ (which isn't in their original plot). +The number \inlinecode{\menkenumpapersdemoyear} is stored in \inlinecode{demo-year.conf}. +The result \inlinecode{\menkenumpapersdemocount} was calculated after generating \inlinecode{columns.txt}. +Both are expanded in the PDF as \LaTeX{} macros. +Enabling the reader to change the value in \inlinecode{demo-year.conf} to automatically update the result, without necessarily knowing how it was generated. +Since a configuration file is a prerequisite of the target that uses it, if it is changed, Make will re-execute the recipe and its descendants. +This encourages testing (without necessarily knowing the implementation details, e.g., by co-authors or future readers), and ensures self-consistency. + +Finally, to satisfy the temporal provenance criteria, version control (currently implemented in Git), plays a defining role in Maneage as shown in Figure \ref{fig:branching}. +In practice, Maneage is a Git branch that contains the shared components, or infrastructure of all projects (e.g., software tarball URLs, build recipes, common subMakefiles and interface script). +Every project starts by branching-off the Maneage branch and customizing it by adding their own title, input data links, writing their narrative, and subMakefiles for their analsyis, see Listing \ref{code:branching}. + +\begin{lstlisting}[ + label=code:branching, + caption={Starting new project with Maneage, and building it}, + ] +# Cloning main Maneage branch and branching-off of it. +$ git clone https://git.maneage.org/project.git +$ cd project +$ git remote rename origin origin-maneage +$ git checkout -b master + +# Build the project in two phases: +$ ./project configure # Build software environment. +$ ./project make # Do analysis, build PDF paper. +\end{lstlisting} + +As Figure \ref{fig:branching} shows, due to this architecture, it is always possible to import, or merge, Maneage into the project to improve the low-level infrastructure: +in (a) the authors merge into Maneage during an ongoing project, +in (b) readers can do it after the paper's publication, even when authors can't be accessed, and the project's infrastructure is outdated, or doesn't build. +Low-level improvements in Maneage are thus automatically propagated to all projects. +This greatly reduces the cost of curation, or maintenance, of each individual project, before and after publication. + + + + + + +\section{Discussion} + +%% It should provide some insight or lessons learned. +%% What is the message we should take from the experience? +%% Are there clear demonstrated design principles that can be reapplied elsewhere? +%% Are there roadblocks or bottlenecks that others might avoid? +%% Are there suggested community or work practices that can make things smoother? +%% Attempt to generalise the significance. +%% should not just present a solution or an enquiry into a unitary problem but make an effort to demonstrate wider significance and application and say something more about the ‘science of data’ more generally. + +As shown in the proof of concept above, it is possible to define a workflow that satisfies the criteria presented in this paper. +Here we will review the lessons learnt and insights gained, while sharing the experience of implementing the RDA recommendations +We will also discuss the design principles, an how they may be generalized and usable in other projects. + +With the support of RDA, the user base and development of the criteria and Maneage grew phenomenally, highlighting some difficulties for wide-spread adoption of these criteria. +Firstly, the low-level tools are not widely used by by many scientists, e.g., Git, \LaTeX, the command-line and Make. +This is primarily because of a lack of exposure, we noticed that after witnessing the improvements in their research, many (especially early career researchers) have started mastering these tools. +Fortunately many research institutes are having courses on these generic tools and we will also be adding more tutorials and demonstration videos in its documentation. + +Secondly, to satisfy the completeness criteria, all the necessary software of the project must be built on various POSIX-compatible systems (we actively test Maneage on several GNU/Linux distributions and macOS). +This requires maintenance by our core team and consumes time and energy. +However, due to the complexity criteria, the PM and analysis share the same job manager. +Our experience has shown that users' experience in the analysis empowers some of them them to add/fix their required software on their own systems, and share that commits on the core branch, thus propagating to all derived projects. +This has already happened in multiple cases. + +Thirdly, publishing a project's reproducible data lineage immediately after publication enables others to continue with followup papers in competition with the original authors. +We propose these solutions: +1) Through the Git history, the work added by another team at any phase of the project can be quantified, contributing to a new concept of authorship in scientific projects and helping to quantify Newton's famous ``\emph{standing on the shoulders of giants}'' quote. +This is a long-term goal and requires major changes to academic value systems. +2) Authors can be given a grace period where the journal or a third party embargoes the source, keeping it private for the embargo period and then publishing it. + +Other implementations of the criteria, or future improvements in Maneage, may solve the caveats above. +However, the proof of concept already shows many advantages to adopting the criteria. +Above, the benefits for researchers was the main focus, but the these criteria also help in data centers, for example with regard to th challenges mentioned in \cite{austin17}: +(1) The burden of curation is shared among all project authors and/or readers (who may find a bug and fix it), not just by data-base curators, improving the sustainability of data centers. +(2) Automated and persistent bi-directional linking of data and publication can be established through the published \& \emph{complete} data lineage that is version controlled. +(3) Software management. +With these criteria, each project's unique and complete software management is included: its not a third-party PM, that needs to be maintained by the data center employees. +This enables easy management, preservation, publishing and citation of used software. +For example see \href{https://doi.org/10.5281/zenodo.3524937}{zenodo.3524937}, \href{https://doi.org/10.5281/zenodo.3408481}{zenodo.3408481}, \href{https://doi.org/10.5281/zenodo.1163746}{zenodo.1163746} where we have exploited the free software criteria to distribute all the used software tarballs with the other project files. +(4) ``Linkages between documentation, code, data, and journal articles in an integrated environment'', which results from the criteria. + +Generally, scientists are rarely trained sufficiently in data management or software development, and the plethora of high-level tools that change every few years does not help. +Such high-level tools are primarily targetted at software developers, who are paid to learn them and use them effectively for short-term projects. +Scientists, on the other hand, need to focus on their own research fields, and need to think about longevity. +Hence, arguably the most important feature is that the un-customized project is already a fully working template blending version control, paper's narrative, software management \emph{and} a modular lineage for analysis with mature tools, allowing scientists to learn them in practice, not abstractly. - \emph{Comparison with existing:} The existing solutions listed above are all free software. - Based on this principle, we do not consider non-free solutions. -\end{enumerate} +Publication of projects with these criteria on a wide scale allows automatic workflow generation, optimized for desired characteristics of the results (for example via machine learning). +Because is complete, algorithms and data selection methods can be similarly optimized. +Furthermore, through elements like the macros, natural language processing can also be included, allowing a direct connection between an analysis and the resulting narrative \emph{and} history of that narrative. +Parsers can be written over projects for meta-research and data provenance studies, for example to generate ``research objects''. +As another example, when a bug is found in one software package, all affected projects can be found and the scale of the effect can be measured. +Combined with SoftwareHeritage, precise high-level science parts of Maneage projects can be accurately cited (e.g., failed/abandoned tests at any historical point). +Many components of ``machine-actionable'' data management plans can be automatically filled out by Maneage, which is useful for project PIs and and grant funders. @@ -277,14 +491,14 @@ The Pozna\'n Supercomputing and Networking Center (PSNC) computational grant 314 %% Bibliography \bibliographystyle{IEEEtran} -\bibliography{IEEEabrv,/home/mohammad/documents/personal/professional/data-science/maneage/paper/source/tex/src/references} +\bibliography{IEEEabrv,references} %% Biography \begin{IEEEbiographynophoto}{Mohammad Akhlaghi} - is currently a big data postdoctoral researcher at the Instituto de Astrof\'isica de Canarias, Tenerife, Spain. - His main scientific interest is in early galaxy evolution, but to extract information from the modern complex datasets, he has been involved in image processing and reproducible workflow management where he has founded GNU Astronomy Utilities (Gnuastro) and Maneage. - He received his PhD in astronomy from Tohoku University, Sendai Japan, and also held a postdoc position at the Centre de Recherche Astrophysique de Lyon (CRAL). - Contact him at mohammad@akhlaghi.org and find his website at https://akhlaghi.org. + is currently a postdoctoral researcher at the Instituto de Astrof\'isica de Canarias, Tenerife, Spain. + His main scientific interest is in early galaxy evolution, but to extract information from the modern complex datasets, he has been involved in image processing and reproducible workflow management where he has founded GNU Astronomy Utilities (Gnuastro) and Maneage (introduced here). + He received his PhD in astronomy from Tohoku University, Sendai Japan, and before coming to Tenerife, held a CNRS postdoc position at the Centre de Recherche Astrophysique de Lyon (CRAL). + Contact him at mohammad@akhlaghi.org and find his website at \url{https://akhlaghi.org}. \end{IEEEbiographynophoto} \begin{IEEEbiographynophoto}{Ra\'ul Infante-Sainz} diff --git a/project b/project index efbd266..47cb5ae 100755 --- a/project +++ b/project @@ -406,6 +406,11 @@ EOF # Run the actual project. controlled_env reproduce/analysis/make/top-make.mk + + # Print the number of words + numwords=$(/usr/bin/pdftotext paper.pdf && cat paper.txt | wc -w) + echo; echo "Number of words in full PDF: $numwords" + rm paper.txt ;; diff --git a/reproduce/analysis/config/demo-year.conf b/reproduce/analysis/config/demo-year.conf new file mode 100644 index 0000000..429b220 --- /dev/null +++ b/reproduce/analysis/config/demo-year.conf @@ -0,0 +1,3 @@ +# This is the demonstration year showing the number of papers studied +# before 1997. +menke-demo-year = 1996 diff --git a/reproduce/analysis/config/menke-demo-year.conf b/reproduce/analysis/config/menke-demo-year.conf deleted file mode 100644 index 429b220..0000000 --- a/reproduce/analysis/config/menke-demo-year.conf +++ /dev/null @@ -1,3 +0,0 @@ -# This is the demonstration year showing the number of papers studied -# before 1997. -menke-demo-year = 1996 diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk index ac05776..c14b83d 100644 --- a/reproduce/analysis/make/demo-plot.mk +++ b/reproduce/analysis/make/demo-plot.mk @@ -27,7 +27,7 @@ $(a2dir):; mkdir $@ # Table for Figure 1C of Menke+20 # ------------------------------- -a2mk20f1c = $(a2dir)/tools-per-year.txt +a2mk20f1c = $(a2dir)/columns.txt $(a2mk20f1c): $(mk20tab3) | $(a2dir) # Remove the (possibly) produced figure that is created from this @@ -47,7 +47,7 @@ $(a2mk20f1c): $(mk20tab3) | $(a2dir) # Final LaTeX macro -$(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/menke-demo-year.conf +$(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf # Find the first year (first column of first row) of data. v=$$(awk 'NR==1{print $$1}' $(a2mk20f1c)) diff --git a/reproduce/analysis/make/format.mk b/reproduce/analysis/make/format.mk index d10034d..3070e6a 100644 --- a/reproduce/analysis/make/format.mk +++ b/reproduce/analysis/make/format.mk @@ -24,7 +24,7 @@ # Save the "Table 3" spreadsheet from the downloaded `.xlsx' file into a # simple plain-text file that is easy to use. a1dir = $(BDIR)/analysis1 -mk20tab3 = $(a1dir)/menke20-table-3.txt +mk20tab3 = $(a1dir)/table-3.txt $(a1dir):; mkdir $@ $(mk20tab3): $(indir)/menke20.xlsx | $(a1dir) diff --git a/reproduce/analysis/make/paper.mk b/reproduce/analysis/make/paper.mk index 4f2088b..a216370 100644 --- a/reproduce/analysis/make/paper.mk +++ b/reproduce/analysis/make/paper.mk @@ -86,15 +86,21 @@ $(mtexdir)/project.tex: $(mtexdir)/verify.tex # recipe and the `paper.pdf' recipe. But if `tex/src/references.bib' hasn't # been modified, we don't want to re-build the bibliography, only the final # PDF. -$(texbdir)/paper.bbl: tex/src/references.bib $(mtexdir)/dependencies-bib.tex \ +$(texbdir)/paper.bbl: tex/src/references.tex $(mtexdir)/dependencies-bib.tex \ | $(mtexdir)/project.tex # If `$(mtexdir)/project.tex' is empty, don't build PDF. @macros=$$(cat $(mtexdir)/project.tex) if [ x"$$macros" != x ]; then + # Unfortunately I can't get bibtex to look into a special + # directory for the references, so we'll copy it here. + p=$$(pwd) + if ! [ -L $(texbdir)/references.bib ]; then + ln -s $$p/tex/src/references.tex $(texbdir)/references.bib + fi + # We'll run LaTeX first to generate the `.bcf' file (necessary # for `biber') and then run `biber' to generate the `.bbl' file. - p=$$(pwd) export TEXINPUTS=$$p: cd $(texbdir); latex -shell-escape -halt-on-error $$p/paper.tex @@ -137,5 +143,4 @@ paper.pdf: $(mtexdir)/project.tex paper.tex $(texbdir)/paper.bbl # file here. cd $$p cp $(texbdir)/$@ $(final-paper) - fi diff --git a/reproduce/software/config/texlive-packages.conf b/reproduce/software/config/texlive-packages.conf index 70f246e..7dac084 100644 --- a/reproduce/software/config/texlive-packages.conf +++ b/reproduce/software/config/texlive-packages.conf @@ -16,4 +16,6 @@ # the basic installation scheme that we used to install tlmgr, they will be # ignored in the `tlmgr install' command, but will be used later when we # want their versions. -texlive-packages = times IEEEtran cite xcolor pgfplots courier ps2eps +texlive-typewriter-pkgs = courier inconsolata xkeyval upquote +texlive-packages = times IEEEtran cite xcolor pgfplots ps2eps \ + listing etoolbox $(texlive-typewriter-pkgs) diff --git a/tex/src/figure-branching.tex b/tex/src/figure-branching.tex index 52a6303..7259f7d 100644 --- a/tex/src/figure-branching.tex +++ b/tex/src/figure-branching.tex @@ -120,9 +120,9 @@ \draw [->, black!40!white, rounded corners, line width=2mm] (11cm,4.5cm) -- (12.5cm,5cm) -- (12.5cm,7.9cm); \draw [black!40!white, line width=2mm] (9.5cm,6cm) -- (12.5cm,7cm); - \draw [anchor=north, black!40!white] (12.7cm,4.8cm) node [scale=1.5] - {\bf Derivative}; - \draw [anchor=north, black!40!white] (12.7cm,4.4cm) node [scale=1.5] + \draw [anchor=north, black!40!white] (12.6cm,4.8cm) node [scale=1.5] + {\bf Derived}; + \draw [anchor=north, black!40!white] (12.6cm,4.4cm) node [scale=1.5] {\bf project}; %% Maneage commits. diff --git a/tex/src/figure-data-lineage.tex b/tex/src/figure-data-lineage.tex index 146a833..fcc52d9 100644 --- a/tex/src/figure-data-lineage.tex +++ b/tex/src/figure-data-lineage.tex @@ -46,9 +46,9 @@ text centered, font=\ttfamily, text width=2.8cm, + minimum width=15cm, minimum height=7.8cm, draw=green!50!black!50, - minimum width=\linewidth, fill=black!10!green!2!white, label={[shift={(0,-5mm)}]\texttt{top-make.mk}}] {}; @@ -62,7 +62,7 @@ \node (analysis2mk) [node-makefile, at={(2.67cm,-1.3cm)}, label={[shift={(0,-5mm)}]\texttt{demo-plot.mk}}] {}; \node [opacity=0.6] (analysis3mk) [node-makefile, at={(5.47cm,-1.3cm)}, - label={[shift={(0,-5mm)}, opacity=0.6]\texttt{another-step.mk}}] {}; + label={[shift={(0,-5mm)}, opacity=0.6]\texttt{next-step.mk}}] {}; %% verify.mk \node [at={(-5.3cm,-2.8cm)}, @@ -137,14 +137,16 @@ %% input-2.dat \ifdefined\inputtwo \node (input2) [node-terminal, at={(-2.93cm,1.9cm)}] {menke20.xlsx}; - \draw [->] (input2) -- (downloadtex); \fi %% INPUTS.conf \ifdefined\inputsconf \node (INPUTS) [node-nonterminal, at={(-2.93cm,4.6cm)}] {INPUTS.conf}; \node (input2-west) [node-point, at={(-4.33cm,1.9cm)}] {}; + \node (downloadtex-west) [node-point, at={(-4.33cm,-0.8cm)}] {}; \draw [->,rounded corners] (INPUTS.west) -| (input2-west) |- (input2); + \draw [->,rounded corners] (INPUTS.west) -| (downloadtex-west) + |- (downloadtex); \fi %% analysis1.tex @@ -155,7 +157,7 @@ %% out1b.dat \ifdefined\outoneb - \node (out1b) [node-terminal, at={(-0.13cm,1.1cm)}] {menke20-table-3.txt}; + \node (out1b) [node-terminal, at={(-0.13cm,1.1cm)}] {table-3.txt}; \draw [->] (out1b) -- (a1tex); \fi @@ -173,9 +175,9 @@ %% out-2b.dat \ifdefined\outtwob - \node (menkedemoyear) [node-nonterminal, at={(2.67cm,4.6cm)}] {menke-demo-year.conf}; + \node (menkedemoyear) [node-nonterminal, at={(2.67cm,4.6cm)}] {demo-year.conf}; \node (a2tex-west) [node-point, at={(1.27cm,-0.8cm)}] {}; - \node (out2b) [node-terminal, at={(2.67cm,0.3cm)}] {tools-per-year.txt}; + \node (out2b) [node-terminal, at={(2.67cm,0.3cm)}] {columns.txt}; \draw [->] (out2b) -- (a2tex); \draw [->,rounded corners] (menkedemoyear.west) -| (a2tex-west) |- (a2tex); \fi @@ -187,7 +189,7 @@ %% analysis3.tex \ifdefined\analysisthreetex - \node [opacity=0.6] (a3tex) [node-terminal, at={(5.47cm,-0.8cm)}] {another-step.tex}; + \node [opacity=0.6] (a3tex) [node-terminal, at={(5.47cm,-0.8cm)}] {next-step.tex}; \draw [opacity=0.6, rounded corners, -, dashed] (a3tex) |- (initialize-south); \fi @@ -216,7 +218,7 @@ \ifdefined\outthreeadep \node [opacity=0.6] (out3a-west) [node-point, at={(4.07cm,2.7cm)}] {}; \draw [opacity=0.6, ->,rounded corners, dashed] (input2) |- (out3a); - \node [opacity=0.6] (a3conf1) [node-nonterminal, at={(5.47cm,4.6cm)}] {param-3.conf}; + \node [opacity=0.6] (a3conf1) [node-nonterminal, at={(5.47cm,4.6cm)}] {param.conf}; \draw [opacity=0.6, rounded corners, dashed] (a3conf1.west) -| (out3a-west) |- (out3a); \fi \end{tikzpicture} diff --git a/tex/src/figure-tools-per-year.tex b/tex/src/figure-tools-per-year.tex index 75557ac..f82402f 100644 --- a/tex/src/figure-tools-per-year.tex +++ b/tex/src/figure-tools-per-year.tex @@ -1,4 +1,4 @@ -\begin{tikzpicture} +\begin{tikzpicture}[scale=0.9] \begin{axis}[ ymin=0, ymax=100, diff --git a/tex/src/preamble-project.tex b/tex/src/preamble-project.tex index c4d7feb..9b956cf 100644 --- a/tex/src/preamble-project.tex +++ b/tex/src/preamble-project.tex @@ -8,11 +8,9 @@ %% For the `\url' command. \usepackage{url} -% correct bad hyphenation here -\hyphenation{op-tical net-works semi-conduc-tor} - -%% To use colors. -\usepackage{xcolor} +%% No need to load xcolor, its included by others below (it conflicts with +%% the listings package. +%\usepackage{xcolor} %% To have links. \usepackage[ @@ -25,3 +23,32 @@ %% To have typewriter font \usepackage{courier} + +%% To have bold monospace +%\usepackage[scaled=0.85]{beramono} +\usepackage{inconsolata} + +%% To display codes. +\usepackage{listings} +\usepackage{etoolbox} +\input{listings-bash.prf} +\lstset{ + frame=lines, + numbers=none, + language=bash, + commentstyle=\color{gray}, + abovecaptionskip=0mm, + belowcaptionskip=0mm, + keywordstyle=\mdseries, + basicstyle=\small\ttfamily\color{blue!35!black}, +} +\makeatletter +\preto\lstlisting{\def\@captype{table}} +\pretocmd\lst@makecaption{\noindent{\rule{\linewidth}{1pt}}}{}{} +\makeatother + + + + +%% Custom macros +\newcommand{\inlinecode}[1]{\textcolor{blue!35!black}{\texttt{#1}}} diff --git a/tex/src/references.bib b/tex/src/references.bib deleted file mode 100644 index e19ec16..0000000 --- a/tex/src/references.bib +++ /dev/null @@ -1,1772 +0,0 @@ -@ARTICLE{clement19, - author = {Cl\'ement-Fontaine, M\'elanie and Di Cosmo, Roberto and Guerry, Bastien and MOREAU, Patrick and Pellegrini, Fran\c cois}, - title = {Encouraging a wider usage of software derived from research}, - year = {2019}, - journal = {Archives ouvertes HAL}, - volume = {}, - pages = {\href{https://hal.archives-ouvertes.fr/hal-02545142}{hal-02545142}}, -} - - - - - -@ARTICLE{dicosmo20, - author = {{Di Cosmo}, Roberto and {Gruenpeter}, Morane and {Zacchiroli}, Stefano}, - title = "{Referencing Source Code Artifacts: a Separate Concern in Software Citation}", - journal = {Computing in Science \& Engineering}, - year = 2020, - volume = 22, - eid = {arXiv:2001.08647}, - pages = {33}, -archivePrefix = {arXiv}, - eprint = {2001.08647}, - primaryClass = {cs.DL}, - doi = {10.1109/MCSE.2019.2963148}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200108647D}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{menke20, - author = {Joe Menke and Martijn Roelandse and Burak Ozyurt and Maryann Martone and Anita Bandrowski}, - title = {Rigor and Transparency Index, a new metric of quality for assessing biological and medical science methods}, - year = {2020}, - journal = {bioRxiv}, - volume = {}, - pages = {2020.01.15.908111}, - doi = {10.1101/2020.01.15.908111}, -} - - - - - -@ARTICLE{konkol20, - author = {{Konkol}, Markus and {N{\"u}st}, Daniel and {Goulier}, Laura}, - title = "{Publishing computational research -- A review of infrastructures for reproducible and transparent scholarly communication}", - journal = {arXiv}, - year = 2020, - month = jan, - pages = {2001.00484}, -archivePrefix = {arXiv}, - eprint = {2001.00484}, - primaryClass = {cs.DL}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200100484K}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{infante20, - author = {{Infante-Sainz}, Ra{\'u}l and {Trujillo}, Ignacio and - {Rom{\'a}n}, Javier}, - title = "{The Sloan Digital Sky Survey extended point spread functions}", - journal = {Monthly Notices of the Royal Astronomical Society}, - keywords = {instrumentation: detectors, methods: data analysis, techniques: image processing, techniques: photometric, galaxies: haloes, Astrophysics - Instrumentation and Methods for Astrophysics, Astrophysics - Astrophysics of Galaxies}, - year = "2020", - month = "Feb", - volume = {491}, - number = {4}, - pages = {5317}, - doi = {10.1093/mnras/stz3111}, -archivePrefix = {arXiv}, - eprint = {1911.01430}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2020MNRAS.491.5317I}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{gibney20, - author = {Elizabeth Gibney}, - title = {This AI researcher is trying to ward off a reproducibility crisis}, - year = {2020}, - journal = {Nature}, - volume = {577}, - pages = {14}, - doi = {10.1038/d41586-019-03895-5}, -} - - - - - -@ARTICLE{pimentel19, - author = {{Jo\~ao Felipe} Pimentel and Leonardo Murta and Vanessa Braganholo and Juliana Freire}, - title = {A large-scale study about quality and reproducibility of jupyter notebooks}, - year = {2019}, - journal = {Proceedings of the 16th International Conference on Mining Software Repositories}, - volume = {1}, - pages = {507}, - doi = {10.1109/MSR.2019.00077}, -} - - - - - -@ARTICLE{miksa19a, - author = {Tomasz Miksa and Paul Walk and Peter Neish}, - title = {RDA DMP Common Standard for Machine-actionable Data Management Plans}, - year = {2019}, - journal = {RDA}, - pages = {doi:10.15497/rda00039}, - doi = {10.15497/rda00039}, -} - - - - - -@ARTICLE{miksa19b, - author = {Tomasz Miksa and Stephanie Simms and Daniel Mietchen and Sarah Jones}, - title = {Ten principles for machine-actionable data management plans}, - year = {2019}, - journal = {PLoS Computational Biology}, - volume = {15}, - pages = {e1006750}, - doi = {10.1371/journal.pcbi.1006750}, -} - - - - - -@ARTICLE{dicosmo19, - author = {Roberto {Di Cosmo} and Francois Pellegrini}, - title = {Encouraging a wider usage of software derived from research}, - year = {2019}, - journal = {\doihref{https://www.ouvrirlascience.fr/wp-content/uploads/2020/02/Opportunity-Note_software-derived-from-research_EN.pdf}{Ouvrir la science}}, - volume = {}, - pages = {}, - doi = {}, -} - - - - - -@ARTICLE{perignon19, - author = {Christophe P\'erignon and Kamel Gadouche and Christophe Hurlin and Roxane Silberman and Eric Debonnel}, - title = {Certify reproducibility with confidential data}, - year = {2019}, - journal = {Science}, - volume = {365}, - pages = {127}, - doi = {10.1126/science.aaw2825}, -} - - - - - -@ARTICLE{munafo19, - author = {Marcus Munaf\'o}, - title = {Raising research quality will require collective action}, - year = {2019}, - journal = {Nature}, - volume = {576}, - pages = {183}, - doi = {10.1038/d41586-019-03750-7}, -} - - - - - -@ARTICLE{jones19, - author = {{Jones}, M.~G. and {Verdes-Montenegro}, L. and {Damas-Segovia}, A. and - {Borthakur}, S. and {Yun}, M. and {del Olmo}, A. and {Perea}, J. and - {Rom{\'a}n}, J. and {Luna}, S. and {Lopez Gutierrez}, D. and - {Williams}, B. and {Vogt}, F.~P.~A. and {Garrido}, J. and - {Sanchez}, S. and {Cannon}, J. and {Ram{\'\i}rez-Moreta}, P.}, - title = "{Evolution of compact groups from intermediate to final stages. A case study of the H I content of HCG 16}", - journal = {Astronomy \& Astrophysics}, - eprint = {1910.03420}, - keywords = {galaxies: groups: individual: HCG 16, galaxies: interactions, galaxies: evolution, galaxies: ISM, radio lines: galaxies}, - year = "2019", - month = "Dec", - volume = {632}, - eid = {A78}, - pages = {A78}, - doi = {10.1051/0004-6361/201936349}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2019A&A...632A..78J}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{banek19, - author = {{Banek}, Christine and {Thornton}, Adam and {Economou}, Frossie and - {Fausti}, Angelo and {Krughoff}, K. Simon and {Sick}, Jonathan}, - title = "{Why is the LSST Science Platform built on Kubernetes?}", - journal = {Proceedings of ADASS XXIX}, - volume = {arXiv}, - keywords = {Astrophysics - Instrumentation and Methods for Astrophysics}, - year = "2019", - month = "Nov", - eid = {arXiv:1911.06404}, - pages = {1911.06404}, -archivePrefix = {arXiv}, - eprint = {1911.06404}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv191106404B}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{fineberg19, - author = {Harvey V. Fineberg and David B. Allison and Lorena A. Barba and Dianne Chong and David L. Donoho and Juliana Freire and Gerald Gabrielse and Constantine Gatsonis and Edward Hall and Thomas H. Jordan and Dietram A. Scheufele and Victoria Stodden and Simine Vazire, Timothy D. Wilson and Wendy Wood and Jennifer Heimberg and Thomas Arrison and Michael Cohen and Michele Schwalbe and Adrienne Stith Butler and Barbara A. Wanchisen and Tina Winters and Rebecca Morgan and Thelma Cox and Lesley Webb and Garret Tyson and Erin Hammers Forstag}, - title = {Reproducibility and Replicability in Science}, - journal = {The National Academies Press}, - year = 2019, - pages = {1}, - doi = {10.17226/25303}, -} - - - - - -@ARTICLE{akhlaghi19, - author = {{Akhlaghi}, Mohammad}, - title = "{Carving out the low surface brightness universe with NoiseChisel}", - journal = {IAU Symposium 355}, - volume = {arXiv}, - keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Astrophysics - Astrophysics of Galaxies, Computer Science - Computer Vision and Pattern Recognition}, - year = "2019", - month = "Sep", - eid = {arXiv:1909.11230}, - pages = {1909.11230}, -archivePrefix = {arXiv}, - eprint = {1909.11230}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190911230A}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{cribbs19, - author = {Cribbs, AP and Luna-Valero, S and George, C and Sudbery, IM and Berlanga-Taylor, AJ and Sansom, SN and Smith, T and Ilott, NE and Johnson, J and Scaber, J and Brown, K and Sims, D and Heger, A}, - title = {CGAT-core: a python framework for building scalable, reproducible computational biology workflows [version 2; peer review: 1 approved, 1 approved with reservations]}, - journal = {F1000Research}, - year = 2019, - volume = 8, - pages = {377}, - doi = {10.12688/f1000research.18674.2}, -} - - - - - -@ARTICLE{brinckman19, -author = "Adam Brinckman and Kyle Chard and Niall Gaffney and Mihael Hategan and Matthew B. Jones and Kacper Kowalik and Sivakumar Kulasekaran and Bertram Ludäscher and Bryce D. Mecum and Jarek Nabrzyski and Victoria Stodden and Ian J. Taylor and Matthew J. Turk and Kandace Turner", - title = {Computing environments for reproducibility: Capturing the ``Whole Tale''}, - journal = {Future Generation Computer Systems}, - year = 2019, - volume = 94, - pages = 854, - doi = {10.1016/j.future.2017.12.029}, -} - - - - - -@ARTICLE{uhse19, - author = {Uhse, Simon and Pflug, Florian G. and {von Haeseler}, Arndt and Djamei, Armin}, - title = {Insertion Pool Sequencing for Insertional Mutant Analysis in Complex Host‐Microbe Interactions}, - journal = {Current Protocols in Plant Biology}, - volume = {4}, - year = "2019", - month = "July", - pages = {e20097}, - doi = {10.1002/cppb.20097}, -} - - - - - -@ARTICLE{alliez19, - author = {{Alliez}, Pierre and {Di Cosmo}, Roberto and {Guedj}, Benjamin and - {Girault}, Alain and {Hacid}, Mohand-Said and {Legrand}, Arnaud and - {Rougier}, Nicolas P.}, - title = "{Attributing and Referencing (Research) Software: Best Practices and Outlook from Inria}", - journal = {Computing in Science \& Engineering}, - volume = {22}, - keywords = {Computer Science - Digital Libraries, Computer Science - Software Engineering}, - year = "2019", - month = "May", - pages = {39}, -archivePrefix = {arXiv}, - eprint = {1905.11123}, - primaryClass = {cs.DL}, - doi = {10.1109/MCSE.2019.2949413}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190511123A}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{kneller19, - author = {Kneller,Gerald R. and Hinsen,Konrad}, - title = {Memory effects in a random walk description of protein structure ensembles}, - journal = {The Journal of Chemical Physics}, - volume = {150}, - year = {2019}, - pages = {064911}, - doi = {10.1063/1.5054887}, -} - - - - - -@article{tange18, - author = {Tange, Ole}, - title = {GNU Parallel 2018}, - Journal = {Zenodo}, - volume = {1146014}, - pages = {\href{https://doi.org/10.5281/zenodo.1146014}{DOI:10.5281/zenodo.1146014}}, - year = 2018, - ISBN = {9781387509881}, - doi = {10.5281/zenodo.1146014}, - url = {https://doi.org/10.5281/zenodo.1146014} -} - - - - - -@ARTICLE{rule18, - author = {Adam Rule and Aur\'elien Tabard and {James D.} Hollan}, - title = {Exploration and Explanation in Computational Notebooks}, - journal = {Proceedings of the 2018 CHI Conference on Human Factors in Computing Systems}, - volume = {1}, - year = {2018}, - pages = {30}, - doi = {10.1145/3173574.3173606}, -} - - - - - -@ARTICLE{plesser18, - author = {Hans E. Plesser}, - title = {Reproducibility vs. Replicability: A Brief History of a Confused Terminology}, - journal = {Frontiers in Neuroinformatics}, - volume = {11}, - year = {2018}, - pages = {76}, - doi = {10.3389/fninf.2017.00076}, -} - - - - - -@ARTICLE{zhang18, - author = {{Zhang}, Zhi-Yu and {Romano}, D. and {Ivison}, R.~J. and - {Papadopoulos}, Padelis P. and {Matteucci}, F.}, - title = "{Stellar populations dominated by massive stars in dusty starburst galaxies across cosmic time}", - journal = {Nature}, - keywords = {Astrophysics - Astrophysics of Galaxies}, - year = "2018", - month = "Jun", - volume = {558}, - number = {7709}, - pages = {260}, - doi = {10.1038/s41586-018-0196-x}, -archivePrefix = {arXiv}, - eprint = {1806.01280}, - primaryClass = {astro-ph.GA}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2018Natur.558..260Z}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{smart18, - author = {{Smart}, A.G.}, - title = {The war over supercooled water}, - journal = {Physics Today}, - volume = {Aug}, - year = "2018", - pages = {DOI:10.1063/PT.6.1.20180822a}, - doi = {10.1063/PT.6.1.20180822a}, -} - - - - - -@ARTICLE{kaiser18, - author = {{Kaiser}, J.}, - title = {Plan to replicate 50 high-impact cancer papers shrinks to just 18}, - journal = {Science}, - volume = {Jul}, - year = "2018", - pages = {31}, - doi = {10.1126/science.aau9619}, -} - - - - - -@ARTICLE{dicosmo18, - author = {{Di Cosmo}, Roberto and {Gruenpeter}, Morane and {Zacchiroli}, Stefano}, - title = {Identifiers for Digital Objects: The case of software source code preservation}, - journal = {Proceedings of iPRES 2018}, - year = "2018", - pages = {204.4}, - doi = {10.17605/osf.io/kde56}, -} - - - - - -@ARTICLE{gruning18, - author = {Gr\"uning, Bj\"orn and Chilton, John and K\"oster, Johannes and Dale, Ryan and Soranzo, Nicola and {van den Beek}, Marius and Goecks, Jeremy and Backofen, Rolf and Nekrutenko, Anton and Taylor, James}, - title = {Practical Computational Reproducibility in the Life Sciences}, - journal = {Cell Systems}, - volume = 6, - year = "2018", - pages = {631. bioRxiv:\href{https://www.biorxiv.org/content/10.1101/200683v2}{200683}}, - doi = {10.1016/j.cels.2018.03.014}, -} - - - - - -@ARTICLE{allen18, - author = {{Allen}, Alice and {Teuben}, Peter J. and {Ryan}, P. Wesley}, - title = "{Schroedinger's Code: A Preliminary Study on Research Source Code Availability and Link Persistence in Astrophysics}", - journal = {The Astrophysical Journal Supplement Series}, - keywords = {methods: numerical, Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Digital Libraries}, - year = "2018", - month = "May", - volume = {236}, - number = {1}, - eid = {10}, - pages = {10}, - doi = {10.3847/1538-4365/aab764}, -archivePrefix = {arXiv}, - eprint = {1801.02094}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2018ApJS..236...10A}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{burrell18, - author = {{Burrell}, A.G. and {Halford}, A. and {Klenzing}, J. and {Stoneback}, R.A. and {Morley}, S.K. and {Annex}, A.M. and {Laundal}, K.M. and {Kellerman}, A.C. and {Stansby}, D. and {Ma}, J.}, - title = {Snakes on a Spaceship—An Overview of Python in Heliophysics}, - journal = {Journal of Geophysical Research: Space Physics}, - volume = {123}, - year = "2018", - pages = {384}, - doi = {10.1029/2018JA025877}, -} - - - - - -@article{stodden18, - author = {{Stodden}, V. and {Seiler}, J. and {Ma}, Z.}, - title = {An empirical analysis of journal policy effectiveness for computational reproducibility}, - volume = {115}, - number = {11}, - pages = {2584}, - year = {2018}, - doi = {10.1073/pnas.1708290115}, - issn = {0027-8424}, - URL = {https://www.pnas.org/content/115/11/2584}, - journal = {Proceedings of the National Academy of Sciences} -} - - - - - -@article {fanelli18, - author = {{Fanelli}, D.}, - title = {Opinion: Is science really facing a reproducibility crisis, and do we need it to?}, - volume = {115}, - number = {11}, - pages = {2628}, - year = {2018}, - doi = {10.1073/pnas.1708272114}, - publisher = {National Academy of Sciences}, - issn = {0027-8424}, - URL = {https://www.pnas.org/content/115/11/2628}, - journal = {Proceedings of the National Academy of Sciences} -} - - - - - - -@ARTICLE{lewis18, - author = {{Lewis}, L.M. and {Edwards}, M.C. and {Meyers}, Z.R. and {Conover Talbot}, C. and {Hao}, H. and {Blum}, D. }, - title = "{Replication Study: Transcriptional amplification in tumor cells with elevated c-Myc}", - journal = {eLife}, - volume = {7}, - year = "2018", - month = "January", - pages = {e30274}, - doi = {10.7554/eLife.30274}, -} - - - - - -@ARTICLE{akhlaghi18b, - author = {{Akhlaghi}, Mohammad and {Bacon}, Roland and {Inami}, Hanae}, - title = "{MUSE HUDF survey I \& II, Sections 7.3 \& 3.4: photometry for objects with no prior broad-band segmentation map}", - journal = {Zenodo}, - pages = {DOI:10.5281/zenodo.1164774}, - year = "2018", - month = "February", - doi = {10.5281/zenodo.1164774}, -} - - - - - -@ARTICLE{akhlaghi18a, - author = {{Akhlaghi}, Mohammad and {Bacon}, Roland}, - title = "{MUSE HUDF survey I, Section 4: data and reproduction pipeline for photometry and astrometry}", - journal = {Zenodo}, - pages = {DOI:10.5281/zenodo.1163746}, - year = "2018", - month = "January", - doi = {10.5281/zenodo.1163746}, -} - - - - - -@ARTICLE{leek17, - author = {Jeffrey T. Leek and Leah R. Jager}, - title = {Is Most Published Research Really False?}, - journal = {Annual Review of Statistics and Its Application}, - volume = {4}, - year = {2017}, - pages = {109}, - doi = {10.1146/annurev-statistics-060116-054104}, -} - - - - - -@ARTICLE{reich17, - author = {Michael Reich and Thorin Tabor and Ted Liefeld and Helga Thorvaldsdóttir and Barbara Hill and Pablo Tamayo and Jill P. Mesirov}, - title = {The GenePattern Notebook Environment}, - journal = {Cell Systems}, - year = {2017}, - volume = {5}, - pages = {149}, - doi = {10.1016/j.cels.2017.07.003}, -} - - - - - -@ARTICLE{becker17, - author = {Gabriel Becker and Cory Barr and Robert Gentleman and Michael Lawrence}, - title = {Enhancing Reproducibility and Collaboration via Management of R Package Cohorts}, - journal = {Journal of Statistical Software, Articles}, - volume = {82}, - pages = 1, - year = "2017", -archivePrefix = {arXiv}, - eprint = {1501.02284}, - doi = {10.18637/jss.v082.i01}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150102284B}, -} - - - - - -@ARTICLE{jenness17, - author = {{Jenness}, Tim}, - title = "{Modern Python at the Large Synoptic Survey Telescope}", - journal = {ADASS 27}, - year = "2017", - month = "Dec", - eid = {arXiv:1712.00461}, - pages = {arXiv:1712.00461}, -archivePrefix = {arXiv}, - eprint = {1712.00461}, - primaryClass = {astro-ph.IM}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2017arXiv171200461J}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@article{bezanson17, - title={Julia: A fresh approach to numerical computing}, - author={Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B}, - journal={SIAM {R}eview}, - volume={59}, - number={1}, - pages={65}, - year={2017}, - archivePrefix={arXiv}, - eprint={1411.1607}, - publisher={SIAM}, - doi={10.1137/141000671}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2014arXiv1411.1607B}, -} - - - - - -@ARTICLE{melson17, - author = {{Melsen}, L.A. and {Torfs}, P.J.J.F and {Uijlenhoet}, R. and {Teuling}, A.J.}, - title = {Comment on “Most computational hydrology is not reproducible, so is it really science?” by Christopher Hutton et al.}, - journal = {Water Resources Research}, - volume = 53, - pages = {2568}, - year = {2017}, - doi = {10.1002/2016WR020208}, -} - - - - - -@ARTICLE{munafo17, - author = {{Munaf\'o}, M.R. and {Nosek}, B.A. and {Bishop}, D.V.M. and {Button}, K.S. and {Chambers}, C.D. and {Percie du Sert}, N. and {Simonsohn}, U. and {Wagenmakers}, E.J. and {Ware}, J.J. {Ioannidis}, J.P.A.}, - title = {A manifesto for reproducible science}, - journal = {Nature Human Behaviour}, - volume = 1, - pages = {21}, - year = {2017}, - doi = {10.1038/s41562-016-0021}, -} - - - - - -@ARTICLE{jimenez17, - title={The popper convention: Making reproducible systems evaluation practical}, - author = {{Jimenez}, I. and {Sevilla}, M. and {Watkins}, N. and {Maltzahn}, C. and {Lofstead}, J. and {Mohror}, K. and {Arpaci-Dusseau}, A. and {Arpaci-Dusseau}, R.}, - journal = {IEEE IPDPSW}, - pages = {1561}, - year = {2017}, - doi = {10.1109/IPDPSW.2017.157}, -} - - - - - -@ARTICLE{bacon17, - author = {{Bacon}, Roland and {Conseil}, Simon and {Mary}, David and - {Brinchmann}, Jarle and {Shepherd}, Martin and {Akhlaghi}, Mohammad and - {Weilbacher}, Peter M. and {Piqueras}, Laure and {Wisotzki}, Lutz and - {Lagattuta}, David and {Epinat}, Benoit and {Guerou}, Adrien and - {Inami}, Hanae and {Cantalupo}, Sebastiano and - {Courbot}, Jean Baptiste and {Contini}, Thierry and {Richard}, Johan and - {Maseda}, Michael and {Bouwens}, Rychard and {Bouch{\'e}}, Nicolas and - {Kollatschny}, Wolfram and {Schaye}, Joop and {Marino}, Raffaella Anna and - {Pello}, Roser and {Herenz}, Christian and {Guiderdoni}, Bruno and - {Carollo}, Marcella}, - title = "{The MUSE Hubble Ultra Deep Field Survey. I. Survey description, data reduction, and source detection}", - journal = {Astronomy \& Astrophysics}, - keywords = {galaxies: distances and redshifts, galaxies: high-redshift, cosmology: observations, methods: data analysis, techniques: imaging spectroscopy, galaxies: formation, Astrophysics - Astrophysics of Galaxies}, - year = "2017", - month = "Nov", - volume = {608}, - eid = {A1}, - pages = {A1}, - doi = {10.1051/0004-6361/201730833}, -archivePrefix = {arXiv}, - eprint = {1710.03002}, - primaryClass = {astro-ph.GA}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2017A\&A...608A...1B}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{austin17, - author = {{Claire C.} Austin and Theodora Bloom and Sünje Dallmeier-Tiessen and {Varsha K.} Khodiyar and Fiona Murphy and Amy Nurnberger and Lisa Raymond and Martina Stockhause and Jonathan Tedds and Mary Vardigan and Angus Whyte}, - title = {Key components of data publishing: using current best practices to develop a reference model for data publishing}, - journal = {International Journal on Digital Libraries}, - volume = {18}, - year = {2017}, - pages = {77}, - doi = {10.1007/s00799-016-0178-2}, -} - - - - - -@ARTICLE{smith16, - author = {Arfon M. Smith and Daniel S. Katz and Kyle E. Niemeyer}, - title = {Software citation principles}, - journal = {PeerJ Computer Science}, - volume = {2}, - year = {2016}, - pages = {e86}, - doi = {10.7717/peerj-cs.86}, -} - - - - - -@ARTICLE{ziemann16, - author = {Mark Ziemann and Yotam Eren and Assam El-Osta}, - title = {Gene name errors are widespread in the scientific literature}, - journal = {Genome Biology}, - volume = {17}, - year = {2016}, - pages = {177}, - doi = {10.1186/s13059-016-1044-7}, -} - - - - - -@ARTICLE{hinsen16, - author = {Konrad Hinsen}, - title = {Scientific notations for the digital era}, - journal = {The Self Journal of Science}, - year = {2016}, - pages = {1: arXiv:\href{https://arxiv.org/abs/1605.02960}{1605.02960}}, -} - - - - - -@ARTICLE{kluyver16, - author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando Pérez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Damián Avila and Safia Abdalla and Carol Willing}, - title = "{Jupyter Notebooks – a publishing format for reproducible computational workflows}", - journal = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, - year = {2016}, - pages = {87}, - doi = {10.3233/978-1-61499-649-1-87}, -} - - - - - -@ARTICLE{baker16, - author = {{Baker}, M.}, - title = "{Is there a reproducibility crisis?}", - journal = {Nature}, - volume = {533}, - year = "2016", - month = "May", - pages = {452}, - doi = {10.1038/533452a}, -} - - - - - -@ARTICLE{wilkinson16, - author = { {Wilkinson}, M.D and {Dumontier}, M. and {Aalbersberg}, I.J. and {Appleton}, G. and {Axton}, M. and {Baak}, A. and {Blomberg}, N. and {Boiten}, J. and {da Silva Santos}, L.B and {Bourne}, P.E. and {Bouwman}, J. and {Brookes}, A.J. and {Clark}, T. and {Crosas}, M. and {Dillo}, I. and {Dumon}, O. and {Edmunds}, S. and {Evelo}, C. and {Finkers}, R. and {Gonzalez-Beltran}, A. and {Gray}, A.J.G. and {Groth}, P. and {Goble}, C. and {Grethe}, Jeffrey S. and {Heringa}, J. and {’t Hoen}, P.A.C and {Hooft}, R. and {Kuhn}, T. and {Kok}, R. and {Kok}, J. and {Lusher}, S. and {Martone}, M. and {Mons}, A. and {Packer}, A. and {Persson}, B. and {Rocca-Serra}, P. and {Roos}, M. and {van Schaik}, R. and {Sansone}, S. and {Schultes}, E. and {Sengstag}, T. and {Slater}, T. and {Strawn}, G. and {Swertz}, M. and {Thompson}, M. and {van der Lei}, J. and {van Mulligen}, E. and {Velterop}, J. and {Waagmeester}, A. and {Wittenburg}, P. and {Wolstencroft}, K. and {Zhao}, J. and {Mons}, B.}, - title = "{The FAIR Guiding Principles for scientific data management and stewardship}", - journal = {Scientific Data}, - year = 2016, - month = mar, - volume = 3, - pages = {160018}, - doi = {10.1038/sdata.2016.18}, -} - - - - -@ARTICLE{hutton16, - author = {{Hutton}, C. and {Wagener}, T. and {Freer}, J. and {Han}, D. and {Duffy}, C. and {Arheimer}, B.}, - title = {Most computational hydrology is not reproducible, so is it really science?}, - journal = {Water Resources Research}, - year = {2016}, - volume = 52, - pages = {7548}, - doi = {10.1002/2016WR019285}, -} - - - - - -@ARTICLE{topalidou16, - author = {{Topalidou}, M. and {Leblois}, A. and {Boraud}, T. and {Rougier}, N.P.}, - title = {A long journey into reproducible computational neuroscience}, - journal = {Frontiers in Computational Neuroscience}, - year = {2016}, - volume = 9, - pages = {30}, - doi = {10.3389/fncom.2015.00030}, -} - - - - - -@ARTICLE{gil16, - author = {{Gil}, Yolanda and {David}, C.H. and {Demir}, I. and {Essawy}, B.T. and {Fulweiler}, R.W. and {Goodall}, J.L. and {Karlstrom}, L. and {Lee}, H. and {Mills}, H.J. and {Oh}, J. and {Pierce}, S.A. and {Pope}, A. and {Tzeng}, M.W. and {Villamizar}, S.R. and {Yu}, X}, - title = {Toward the Geoscience Paper of the Future: Best practices for documenting and sharing research from data to software to provenance}, - journal = {Earth and Space Science}, - year = 2016, - volume = 3, - pages = {388}, - doi = {10.1002/2015EA000136}, -} - - - - - -@ARTICLE{romine15, - author = {Charles H. Romine}, - title = {Secure Hash Standard (SHS)}, - journal = {Federal Information processing standards publication}, - volume = {180}, - pages = {4}, - year = {2015}, - doi = {10.6028/NIST.FIPS.180-4}, -} - - - - - -@ARTICLE{horvath15, - author = {Steve Horvath}, - title = {Erratum to: DNA methylation age of human tissues and cell types}, - journal = {Genome Biology}, - volume = {16}, - pages = {96}, - year = {2015}, - doi = {10.1186/s13059-015-0649-6}, -} - - - - - -@ARTICLE{chang15, - author = {Andrew C. Chang and Phillip Li}, - title = {Is Economics Research Replicable? Sixty Published Papers from Thirteen Journals Say ``Usually Not''}, - journal = {Finance and Economics Discussion Series 2015-083}, - year = {2015}, - pages = {1}, - doi = {10.17016/FEDS.2015.083}, -} - - - - - -@ARTICLE{schaffer15, - author = {Jonathan Schaffer}, - title = {What Not to Multiply Without Necessity}, - journal = {Australasian Journal of Philosophy}, - volume = {93}, - pages = {644}, - year = {2015}, - doi = {10.1080/00048402.2014.992447}, -} - - - - - -@ARTICLE{clarkso15, - author = "Chris Clarkson and Mike Smith and Ben Marwick and Richard Fullagar and Lynley A. Wallis and Patrick Faulkner and Tiina Manne and Elspeth Hayes and Richard G. Roberts and Zenobia Jacobs and Xavier Carah and Kelsey M. Lowe and Jacqueline Matthews and S. Anna Florin", - title = {The archaeology, chronology and stratigraphy of Madjedbebe (Malakunanja II): A site in northern Australia with early occupation}, - journal = {Journal of Human Evolution}, - year = 2015, - volume = 83, - pages = 46, - doi = {10.1016/j.jhevol.2015.03.014}, -} - - - - - -@ARTICLE{meng15, - author = {Haiyan Meng and Rupa Kommineni and Quan Pham and Robert Gardner and Tanu Malik and Douglas Thain}, - title = {An invariant framework for conducting reproducible computational science}, - journal = {Journal of Computational Science}, - year = 2015, - volume = 9, - pages = 137, - doi = {10.1016/j.jocs.2015.04.012}, -} - - - - - -@ARTICLE{gamblin15, - author = {Gamblin, Todd and LeGendre, Matthew and Collette, Michael R. and Lee, Gregory L. and Moody, Adam and {de Supinski}, Bronis R. and Futral, Scott}, - title = {The Spack package manager: bringing order to HPC software chaos}, - journal = {IEEE SC15}, - year = 2015, - volume = 1, - pages = {1}, - doi = {10.1145/2807591.2807623}, -} - - - - -@ARTICLE{akhlaghi15, - author = {{Akhlaghi}, M. and {Ichikawa}, T.}, - title = "{Noise-based Detection and Segmentation of Nebulous Objects}", - journal = {The Astrophysical Journal Supplement Series}, - archivePrefix = "arXiv", - eprint = {1505.01664}, - primaryClass = "astro-ph.IM", - keywords = {galaxies: irregular, galaxies: photometry, galaxies: structure, methods: data analysis, techniques: image processing, techniques: photometric}, - year = 2015, - month = sep, - volume = 220, - eid = {1}, - pages = {1}, - doi = {10.1088/0067-0049/220/1/1}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2015ApJS..220....1A}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{courtes15, - author = {{Court{\'e}s}, Ludovic and {Wurmus}, Ricardo}, - title = {Reproducible and User-Controlled Software Environments in HPC with Guix}, - journal = {Euro-Par}, - volume = {9523}, - keywords = {Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Operating Systems, Computer Science - Software Engineering}, - year = {2015}, - month = {Jun}, - eid = {arXiv:1506.02822}, - pages = {arXiv:1506.02822}, -archivePrefix = {arXiv}, - eprint = {1506.02822}, - primaryClass = {cs.DC}, - doi = {10.1007/978-3-319-27308-2_47}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150602822C}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{hinsen15, - author = {{Hinsen}, K.}, - title = {ActivePapers: a platform for publishing and archiving computer-aided research}, - journal = {F1000Research}, - year = 2015, - volume = 3, - pages = {289}, - doi = {10.12688/f1000research.5773.3}, -} - - - - - -@ARTICLE{belhajjame15, - author = {{Belhajjame}, K. and {Zhao}, Z. and {Garijo}, D. and {Gamble}, M. and {Hettne}, K. and {Palma}, R. and {Mina}, E. and {Corcho}, O. and {Gómez-Pérez}, J.M. and {Bechhofer}, S. and {Klyne}, G. and {Goble}, C}, - title = "{Using a suite of ontologies for preserving workflow-centric research objects}", - journal = {Journal of Web Semantics}, - year = 2015, - volume = 32, - pages = {16}, - doi = {10.1016/j.websem.2015.01.003}, -} - - - - - -@ARTICLE{bechhofer13, - author = {{Bechhofer}, S. and {Buchan}, I. and {De Roure}, D. and {Missier}, P. and {Ainsworth}, J. and {Bhagat}, J. and Couch, P. and Cruickshank, D. and {Delderfield}, M and Dunlop, I. and {Gamble}, M. and {Michaelides}, D. and {Owen}, S. and {Newman}, D. and {Sufi}, S. and {Goble}, C}, - title = "{Why linked data is not enough for scientists}", - journal = {Future Generation Computer Systems}, - year = 2013, - volume = 29, - pages = {599}, - doi = {10.1016/j.future.2011.08.004}, -} - - - - - -@ARTICLE{peng15, - author = {{Peng}, R.D.}, - title = {The reproducibility crisis in science: A statistical counterattack}, - journal = {Significance}, - year = 2015, - month = jun, - volume = 12, - pages = {30}, - doi = {10.1111/j.1740-9713.2015.00827.x}, -} - - - - - -@ARTICLE{katz14, - author = {Daniel S. Katz}, - title = {Transitive Credit as a Means to Address Social and Technological Concerns Stemming from Citation and Attribution of Digital Products}, - journal = {Journal of Open Research Software}, - year = {2014}, - volume = {2}, - pages = {e20}, - doi = {10.5334/jors.be}, -} - - - - - -@ARTICLE{herndon14, - author = {Thomas Herndon and Michael Ash and Robert Pollin}, - title = {Does high public debt consistently stifle economic growth? A critique of Reinhart and Rogoff}, - journal = {Cambridge Journal of Economics}, - year = {2014}, - month = {dec}, - volume = {38}, - pages = {257}, - doi = {10.1093/cje/bet075}, -} - - - - - -@ARTICLE{easterbrook14, - author = {{Easterbook}, S.}, - title = {Open code for open science?}, - journal = {Nature Geoscience}, - year = 2014, - month = oct, - volume = 7, - pages = {779}, - doi = {10.1038/ngeo2283}, -} - - - - - -@ARTICLE{fomel13, - author = {Sergey Fomel and Paul Sava and Ioan Vlad and Yang Liu and Vladimir Bashkardin}, - title = {Madagascar: open-source software project for multidimensional data analysis and reproducible computational experiments}, - journal = {Journal of open research software}, - year = {2013}, - volume = {1}, - pages = {e8}, - doi = {10.5334/jors.ag}, -} - - - - - -@ARTICLE{sandve13, - author = {{Sandve}, G.K. and {Nekrutenko}, A. and {Taylor}, J. and {Hovig}, E.}, - title = {Ten Simple Rules for Reproducible Computational Research}, - journal = {PLoS Computational Biology}, - year = 2013, - month = oct, - volume = 9, - pages = {e1003285}, - doi = {10.1371/journal.pcbi.1003285}, -} - - - - - -@ARTICLE{malik13, - author = {Tanu Malik and Quan Pham and Ian Foster}, - title = {SOLE: Towards Descriptive and Interactive Publications}, - journal = {Implementing Reproducible Research}, - year = 2013, - volume = {Chapter 2}, - pages = {1. URL: \url{https://osf.io/ns2m3}}, -} - - - - - -@ARTICLE{gronenschild12, - author = {Ed H. B. M. Gronenschild and Petra Habets and Heidi I. L. Jacobs and Ron Mengelers and Nico Rozendaal and Jim van Os and Machteld Marcelis}, - title = {The Effects of FreeSurfer Version, Workstation Type, and Macintosh Operating System Version on Anatomical Volume and Cortical Thickness Measurements}, - journal = {PLoS ONE}, - volume = {7}, - year = {2012}, - pages = {e38234}, - doi = {10.1371/journal.pone.0038234}, -} - - - - - -@ARTICLE{pham12, - author = {Quan Pham and Tanu Malik and Ian Foster and Roberto {Di Lauro} and Raffaele Montella}, - title = {SOLE: Linking Research Papers with Science Objects}, - journal = {Provenance and Annotation of Data and Processes (IPAW)}, - year = {2012}, - pages = {203}, - doi = {10.1007/978-3-642-34222-6_16}, -} - - - - - -@ARTICLE{davison12, - author = {Andrew Davison}, - title = {Automated Capture of Experiment Context for Easier Reproducibility in Computational Research}, - journal = {Computing in Science \& Engineering}, - volume = {14}, - year = {2012}, - pages = {48}, - doi = {10.1109/MCSE.2012.41}, -} - - - - - -@ARTICLE{zhao12, - author = {Jun Zhao and Jose Manuel Gomez-Perez and Khalid Belhajjame and Graham Klyne and Esteban Garcia-Cuesta and Aleix Garrido and Kristina Hettne and Marco Roos and David {De Roure} and Carole Goble}, - title = {Why workflows break — Understanding and combating decay in Taverna workflows}, - journal = {IEEE 8th International Conference on E-Science}, - year = {2012}, - pages = {1}, - doi = {10.1109/eScience.2012.6404482}, -} - - - - -@ARTICLE{vangorp11, - author = {Pieter {Van Gorp} and Steffen Mazanek}, - title = {SHARE: a web portal for creating and sharing executable research}, - journal = {Procedia Computer Science}, - year = 2011, - volume = 4, - pages = {589}, - doi = {10.1016/j.procs.2011.04.062}, -} - - - - - -@ARTICLE{hinsen11, - author = {{Hinsen}, Konrad}, - title = {A data and code model for reproducible research and executable papers}, - journal = {Procedia Computer Science}, - year = 2011, - volume = 4, - pages = {579}, - doi = {10.1016/j.procs.2011.04.061}, -} - - - - - -@ARTICLE{limare11, - author = {Nicolas Limare and Jean-Michel Morel}, - title = {The IPOL Initiative: Publishing and Testing Algorithms on Line for -Reproducible Research in Image Processing}, - journal = {Procedia Computer Science}, - year = 2011, - volume = 4, - pages = {716}, - doi = {10.1016/j.procs.2011.04.075}, -} - - - - - -@ARTICLE{gavish11, - author = {Matan Gavish and David L. Donoho}, - title = {A Universal Identifier for Computational Results}, - journal = {Procedia Computer Science}, - year = 2011, - volume = 4, - pages = {637}, - doi = {10.1016/j.procs.2011.04.067}, -} - - - - -@ARTICLE{gabriel11, - author = {Ann Gabriel and Rebecca Capone}, - title = {Executable Paper Grand Challenge Workshop}, - journal = {Procedia Computer Science}, - volume = {4}, - year = {2011}, - pages = {577}, - doi = {10.1016/j.procs.2011.04.060}, -} - - - - - -@ARTICLE{nowakowski11, - author = {Piotr Nowakowski and Eryk Ciepiela and Daniel Har\k{e}\.{z}lak and Joanna Kocot and Marek Kasztelnik and Tomasz Barty\'nski and Jan Meizner and Grzegorz Dyk and Maciej Malawski}, - title = {The Collage Authoring Environment}, - journal = {Procedia Computer Science}, - volume = {4}, - year = {2011}, - pages = {608}, - doi = {j.procs.2011.04.064}, -} - - - - - -@ARTICLE{peng11, - author = {{Peng}, R.D.}, - title = {Reproducible Research in Computational Science}, - journal = {Science}, - year = {2011}, - month = dec, - volume = 334, - pages = {1226}, - doi = {10.1126/science.1213847}, -} - - - - - -@ARTICLE{gil10, - author = {Yolanda Gil and Pedro A. González-Calero and Jihie Kim and Joshua Moody and Varun Ratnakar}, - title = {A semantic framework for automatic generation of computational workflows using distributed data and component catalogues}, - journal = {Journal of Experimental \& Theoretical Artificial Intelligence}, - year = {2010}, - volume = {23}, - pages = {389}, - doi = {10.1080/0952813X.2010.490962}, -} - - - - - -@ARTICLE{pence10, - author = {{Pence}, W.~D. and {Chiappetti}, L. and {Page}, C.~G. and {Shaw}, R.~A. and - {Stobie}, E.}, - title = "{Definition of the Flexible Image Transport System (FITS), version 3.0}", - journal = {Astronomy and Astrophysics}, - keywords = {instrumentation: miscellaneous, methods: miscellaneous, techniques: miscellaneous, reference systems, standards, astronomical databases: miscellaneous}, - year = "2010", - month = "Dec", - volume = {524}, - eid = {A42}, - pages = {A42}, - doi = {10.1051/0004-6361/201015362}, - adsurl = {https://ui.adsabs.harvard.edu/abs/2010A\&A...524A..42P}, - adsnote = {Provided by the SAO/NASA Astrophysics Data System} -} - - - - - -@ARTICLE{goecks10, - author = {Jeremy Goecks and Anton Nekrutenko and James Taylor}, - title = {Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences}, - journal = {Genome Biology}, - year = {2010}, - volume = {11}, - pages = {R86}, - doi = {10.1186/gb-2010-11-8-r86}, -} - - - - - -@ARTICLE{merali10, - author = {Zeeya Merali}, - title = {Computational science: ...Error}, - journal = {Nature}, - year = 2010, - volume = 467, - pages = {775}, - doi = {10.1038/467775a}, -} - - - - - -@ARTICLE{casadevall10, - author = {{Casadevall}, A. and {Fang}, F.C}, - title = {Reproducible Science}, - journal = {Infection and Immunity}, - year = 2010, - volume = 78, - pages = {4972}, - doi = {10.1128/IAI.00908-10}, -} - - - - - -@ARTICLE{mesirov10, - author = {{Mesirov}, J.P.}, - title = {Accessible Reproducible Research}, - journal = {Science}, - year = 2010, - volume = 327, - pages = {415}, - doi = {10.1126/science.1179653}, -} - - - - - -@ARTICLE{cheney09, - author = {James Cheney and Laura Chiticariu and Wang-Chiew Tan}, - title = {Provenance in Databases: Why, How, and Where}, - journal = {Foundations and Trends in Databases}, - year = {2009}, - volume = {1}, - pages = {379}, - doi = {10.1561/1900000006}, -} - - - - - -@ARTICLE{ioannidis2009, - author = {John P. A. Ioannidis and David B. Allison and Catherine A. Ball and Issa Coulibaly and Xiangqin Cui and Aedín C Culhane and Mario Falchi and Cesare Furlanello and Laurence Game and Giuseppe Jurman and Jon Mangion and Tapan Mehta and Michael Nitzberg and Grier P. Page and Enrico Petretto and Vera {van Noort}}, - title = {Repeatability of published microarray gene expression analyses}, - journal = {Nature Genetics}, - year = {2009}, - volume = {41}, - pages = {149}, - doi = {10.1038/ng.295}, -} - - - - - -@ARTICLE{fomel09, - author = {Sergey Fomel and Jon F. Claerbout}, - title = {Reproducible Research}, - journal = {Computing in Science Engineering}, - year = {2009}, - volume = {11}, - pages = {5}, - doi = {10.1109/MCSE.2009.14}, -} - - - - - -@ARTICLE{baggerly09, - author = {Keith A. Baggerly and Kevin R Coombes}, - title = {Deriving chemosensitivity from cell lines: Forensic bioinformatics and reproducible research in high-throughput biology}, - journal = {The Annals of Applied Statistics}, - year = {2009}, - volume = {3}, - pages = {1309}, - doi = {10.1214/09-AOAS291}, -} - - - - - -@ARTICLE{scheidegger08, - author = {Carlos Scheidegger and David Koop and Emanuele Santos and Huy Vo and Steven Callahan and Juliana Freire and Cláudio Silva}, - title = {Tackling the Provenance Challenge one layer at a time}, - journal = {Concurrency Computation: Practice and Experiment}, - year = {2008}, - volume = {20}, - pages = {473}, - doi = {10.1002/cpe.1237}, -} - - - - - -@ARTICLE{moreau08, - author = {Moreau, Luc and Ludäscher, Bertram and Altintas, Ilkay and Barga, Roger S. and Bowers, Shawn and Callahan, Steven and Chin JR., George and Clifford, Ben and Cohen, Shirley and Cohen-Boulakia, Sarah and Davidson, Susan and Deelman, Ewa and Digiampietri, Luciano and Foster, Ian and Freire, Juliana and Frew, James and Futrelle, Joe and Gibson, Tara and Gil, Yolanda and Goble, Carole and Golbeck, Jennifer and Groth, Paul and Holland, David A. and Jiang, Sheng and Kim, Jihie and Koop, David and Krenek, Ales and McPhillips, Timothy and Mehta, Gaurang and Miles, Simon and Metzger, Dominic and Munroe, Steve and Myers, Jim and Plale, Beth and Podhorszki, Norbert and Ratnakar, Varun and Santos, Emanuele and Scheidegger, Carlos and Schuchardt, Karen and Seltzer, Margo and Simmhan, Yogesh L. and Silva, Claudio and Slaughter, Peter and Stephan, Eric and Stevens, Robert and Turi, Daniele and Vo, Huy and Wilde, Mike and Zhao, Jun and Zhao, Yong}, - title = {The First Provenance Challenge}, - journal = {Concurrency Computation: Practice and Experiment}, - year = {2008}, - volume = {20}, - pages = {473}, - doi = {10.1002/cpe.1233}, -} - - - - - -@Article{matplotlib2007, - Author = {Hunter, J. D.}, - Title = {Matplotlib: A 2D graphics environment}, - Journal = {CiSE}, - Volume = {9}, - Number = {3}, - Pages = {90}, - abstract = {Matplotlib is a 2D graphics package used for Python - for application development, interactive scripting, and - publication-quality image generation across user - interfaces and operating systems.}, - publisher = {IEEE COMPUTER SOC}, - doi = {10.1109/MCSE.2007.55}, - year = 2007 -} - - - - - -@ARTICLE{witten2007, - author = {Ben Witten and Bill Curry and Jeff Shragge}, - title = {A New Build Environment for SEP}, - journal = {Stanford Exploration Project}, - year = {2007}, - volume = {129}, - pages = {247: \url{http://sepwww.stanford.edu/data/media/public/docs/sep129/ben1.pdf}}, -} - - - - - -@ARTICLE{miller06, - author = {Greg Miller}, - title = {A Scientist's Nightmare: Software Problem Leads to Five Retractions}, - journal = {Science}, - year = {2006}, - volume = {314}, - pages = {1856}, - doi = {10.1126/science.314.5807.1856}, -} - - - - - -@ARTICLE{reich06, - author = {Michael Reich and Ted Liefeld and Joshua Gould and Jim Lerner and Pablo Tamayo and Jill P Mesirov}, - title = {GenePattern 2.0}, - journal = {Nature Genetics}, - year = {2006}, - volume = {38}, - pages = {500}, - doi = {10.1038/ng0506-500}, -} - - - - - -@ARTICLE{ludascher05, - author = {Ludäs\-cher, Bertram and Altintas, Ilkay and Berkley, Chad and Higgins, Dan and Jaeger, Efrat and Jones, Matthew and Lee, Edward A. and Tao, Jing and Zhao, Yang}, - title = {Scientific workflow management and the Kepler system}, - journal = {Concurrency Computation: Practice and Experiment}, - year = {2006}, - volume = {18}, - pages = {1039}, - doi = {10.1002/cpe.994}, -} - - - - - -@ARTICLE{ioannidis05, - author = {John P. A. Ioannidis}, - title = {Why Most Published Research Findings Are False}, - journal = {PLoS Medicine }, - year = {2005}, - volume = {2}, - pages = {e124}, - doi = {10.1371/journal.pmed.0020124}, -} - - - - - -@ARTICLE{bavoil05, - author = {Louis Bavoil and Steven P. Callahan and Patricia J. Crossno and Juliana Freire and Carlos E. Scheidegger and Cláudio T. Silva and Huy T. Vo}, - title = {VisTrails: Enabling Interactive Multiple-View Visualizations}, - journal = {VIS 05. IEEE Visualization}, - year = {2005}, - volume = {}, - pages = {135}, - doi = {10.1109/VISUAL.2005.1532788}, -} - - - - - -@ARTICLE{dolstra04, - author = {{Dolstra}, Eelco and {de Jonge}, Merijn and {Visser}, Eelco}, - title = {Nix: A Safe and Policy-Free System for Software Deployment}, - journal = {Large Installation System Administration Conference}, - year = {2004}, - volume = {18}, - pages = {79. \url{https://www.usenix.org/legacy/events/lisa04/tech/full_papers/dolstra/dolstra.pdf}}, -} - - - - - -@ARTICLE{oinn04, - author = {Oinn, Tom and Addis, Matthew and Ferris, Justin and Marvin, Darren and Senger, Martin and Greenwood, Mark and Carver, Tim and Glover, Kevin and Pocock, Matthew R. and Wipat, Anil and Li, Peter}, - title = {Taverna: a tool for the composition and enactment of bioinformatics workflows}, - journal = {Bioinformatics}, - year = {2004}, - volume = {20}, - pages = {3045}, - doi = {10.1093/bioinformatics/bth361}, -} - - - - - -@ARTICLE{schwab2000, - author = {Matthias Schwab and Martin Karrenbach and Jon F. Claerbout}, - title = {Making scientific computations reproducible}, - journal = {Computing in Science \& Engineering}, - year = {2000}, - volume = {2}, - pages = {61}, - doi = {10.1109/5992.881708}, -} - - - - - -@ARTICLE{buckheit1995, - author = {Jonathan B. Buckheit and David L. Donoho}, - title = {WaveLab and Reproducible Research}, - journal = {Wavelets and Statistics}, - year = {1995}, - volume = {1}, - pages = {55}, - doi = {10.1007/978-1-4612-2544-7\_5}, -} - - - - - -@ARTICLE{claerbout1992, - author = {Jon F. Claerbout and Martin Karrenbach}, - title = {Electronic documents give reproducible research a new meaning}, - journal = {SEG Technical Program Expanded Abstracts}, - year = {1992}, - volume = {1}, - pages = {601}, - doi = {10.1190/1.1822162}, -} - - - - - -@ARTICLE{eker03, - author = {Johan Eker and Jorn W Janneck and Edward A. Lee and Jie Liu and Xiaojun Liu and Jozsef Ludvig and Sonia Sachs and Yuhong Xiong and Stephen Neuendorffer}, - title = {Taming heterogeneity - the Ptolemy approach}, - journal = {Proceedings of the IEEE}, - year = {2003}, - volume = {91}, - pages = {127}, - doi = {10.1109/JPROC.2002.805829}, -} - - - - - -@ARTICLE{stevens03, - author = {Robert Stevens and Kevin Glover and Chris Greenhalgh and Claire Jennings and Simon Pearce and Peter Li and Melena Radenkovic and Anil Wipat}, - title = {Performing in silico Experiments on the Grid: A Users Perspective}, - journal = {Proceedings of UK e-Science All Hands Meeting}, - year = {2003}, - pages = {43}, -} - - - - - -@ARTICLE{knuth84, - author = {Donald Knuth}, - title = {Literate Programming}, - journal = {The Computer Journal}, - year = {1984}, - volume = {27}, - pages = {97}, - doi = {10.1093/comjnl/27.2.97}, -} - - - - - -@ARTICLE{stallman88, - author = {Richard M. Stallman and Roland McGrath and Paul D. Smith}, - title = {GNU Make: a program for directing recompilation}, - journal = {Free Software Foundation}, - year = {1988}, - pages = {ISBN:1-882114-83-3. \url{https://www.gnu.org/s/make/manual/make.pdf}}, -} - - - - - -@ARTICLE{somogyi87, - author = {Zoltan Somogyi}, - title = {Cake: a fifth generation version of make}, - journal = {University of Melbourne}, - year = {1987}, - pages = {1: \url{https://pdfs.semanticscholar.org/3e97/3b5c9af7763d70cdfaabdd1b96b3b75b5483.pdf}}, -} - - - - - -@ARTICLE{feldman79, - author = {Stuart I. Feldman}, - title = {Make -- a program for maintaining computer programs}, - journal = {Journal of Software: Practice and Experience}, - volume = {9}, - pages = {255}, - year = {1979}, - doi = {10.1002/spe.4380090402}, -} - - - - - -@ARTICLE{mcilroy78, - author = {M. D. McIlroy and E. N. Pinson and B. A. Tague}, - title = {UNIX Time-Sharing System: Forward}, - journal = {\doihref{https://archive.org/details/bstj57-6-1899/mode/2up}{Bell System Technical Journal}}, - year = {1978}, - volume = {57}, - pages = {6, ark:/13960/t0gt6xf72}, - doi = {}, -} - - - - - -@ARTICLE{anscombe73, - author = {{Anscombe}, F.J.}, - title = {Graphs in Statistical Analysis}, - journal = {The American Statistician}, - year = {1973}, - volume = {27}, - pages = {17}, - doi = {10.1080/00031305.1973.10478966}, -} - - - - - -@ARTICLE{roberts69, - author = {{Roberts}, K.V.}, - title = {The publication of scientific fortran programs}, - journal = {Computer Physics Communications}, - year = {1969}, - volume = {1}, - pages = {1}, - doi = {10.1016/0010-4655(69)90011-3}, -} diff --git a/tex/src/references.tex b/tex/src/references.tex new file mode 100644 index 0000000..f355bba --- /dev/null +++ b/tex/src/references.tex @@ -0,0 +1,1786 @@ +@ARTICLE{mesnard20, + author = {Olivier Mesnard and Lorena A. Barba}, + title = {Reproducible Workflow on a Public Cloud for Computational Fluid Dynamics}, + year = {2020}, + journal = {Computing in Science \& Engineering}, + volume = {22}, + pages = {102-116}, + doi = {10.1109/MCSE.2019.2941702}, +} + + + + + +@ARTICLE{dicosmo20, + author = {{Di Cosmo}, Roberto and {Gruenpeter}, Morane and {Zacchiroli}, Stefano}, + title = "{Referencing Source Code Artifacts: a Separate Concern in Software Citation}", + journal = {Computing in Science \& Engineering}, + year = 2020, + volume = 22, + eid = {arXiv:2001.08647}, + pages = {33}, +archivePrefix = {arXiv}, + eprint = {2001.08647}, + primaryClass = {cs.DL}, + doi = {10.1109/MCSE.2019.2963148}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200108647D}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{menke20, + author = {Joe Menke and Martijn Roelandse and Burak Ozyurt and Maryann Martone and Anita Bandrowski}, + title = {Rigor and Transparency Index, a new metric of quality for assessing biological and medical science methods}, + year = {2020}, + journal = {bioRxiv}, + volume = {}, + pages = {2020.01.15.908111}, + doi = {10.1101/2020.01.15.908111}, +} + + + + + +@ARTICLE{konkol20, + author = {{Konkol}, Markus and {N{\"u}st}, Daniel and {Goulier}, Laura}, + title = "{Publishing computational research -- A review of infrastructures for reproducible and transparent scholarly communication}", + journal = {arXiv}, + year = 2020, + month = jan, + pages = {2001.00484}, +archivePrefix = {arXiv}, + eprint = {2001.00484}, + primaryClass = {cs.DL}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200100484K}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{infante20, + author = {{Infante-Sainz}, Ra{\'u}l and {Trujillo}, Ignacio and + {Rom{\'a}n}, Javier}, + title = "{The Sloan Digital Sky Survey extended point spread functions}", + journal = {Monthly Notices of the Royal Astronomical Society}, + keywords = {instrumentation: detectors, methods: data analysis, techniques: image processing, techniques: photometric, galaxies: haloes, Astrophysics - Instrumentation and Methods for Astrophysics, Astrophysics - Astrophysics of Galaxies}, + year = "2020", + month = "Feb", + volume = {491}, + number = {4}, + pages = {5317-5329}, + doi = {10.1093/mnras/stz3111}, +archivePrefix = {arXiv}, + eprint = {1911.01430}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020MNRAS.491.5317I}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{gibney20, + author = {Elizabeth Gibney}, + title = {This AI researcher is trying to ward off a reproducibility crisis}, + year = {2020}, + journal = {Nature}, + volume = {577}, + pages = {14}, + doi = {10.1038/d41586-019-03895-5}, +} + + + + + +@ARTICLE{clement19, + author = {Cl\'ement-Fontaine, M\'elanie and Di Cosmo, Roberto and Guerry, Bastien and MOREAU, Patrick and Pellegrini, Fran\c cois}, + title = {Encouraging a wider usage of software derived from research}, + year = {2019}, + journal = {Archives ouvertes HAL}, + volume = {}, + pages = {\href{https://hal.archives-ouvertes.fr/hal-02545142}{hal-02545142}}, +} + + + + + +@ARTICLE{pimentel19, + author = {{Jo\~ao Felipe} Pimentel and Leonardo Murta and Vanessa Braganholo and Juliana Freire}, + title = {A large-scale study about quality and reproducibility of jupyter notebooks}, + year = {2019}, + journal = {Proceedings of the 16th International Conference on Mining Software Repositories}, + volume = {1}, + pages = {507-517}, + doi = {10.1109/MSR.2019.00077}, +} + + + + + +@ARTICLE{miksa19a, + author = {Tomasz Miksa and Paul Walk and Peter Neish}, + title = {RDA DMP Common Standard for Machine-actionable Data Management Plans}, + year = {2019}, + journal = {RDA}, + pages = {doi:10.15497/rda00039}, + doi = {10.15497/rda00039}, +} + + + + + +@ARTICLE{miksa19b, + author = {Tomasz Miksa and Stephanie Simms and Daniel Mietchen and Sarah Jones}, + title = {Ten principles for machine-actionable data management plans}, + year = {2019}, + journal = {PLoS Computational Biology}, + volume = {15}, + pages = {e1006750}, + doi = {10.1371/journal.pcbi.1006750}, +} + + + + + +@ARTICLE{dicosmo19, + author = {Roberto {Di Cosmo} and Francois Pellegrini}, + title = {Encouraging a wider usage of software derived from research}, + year = {2019}, + journal = {\doihref{https://www.ouvrirlascience.fr/wp-content/uploads/2020/02/Opportunity-Note_software-derived-from-research_EN.pdf}{Ouvrir la science}}, + volume = {}, + pages = {}, + doi = {}, +} + + + + + +@ARTICLE{perignon19, + author = {Christophe P\'erignon and Kamel Gadouche and Christophe Hurlin and Roxane Silberman and Eric Debonnel}, + title = {Certify reproducibility with confidential data}, + year = {2019}, + journal = {Science}, + volume = {365}, + pages = {127}, + doi = {10.1126/science.aaw2825}, +} + + + + + +@ARTICLE{munafo19, + author = {Marcus Munaf\'o}, + title = {Raising research quality will require collective action}, + year = {2019}, + journal = {Nature}, + volume = {576}, + pages = {183}, + doi = {10.1038/d41586-019-03750-7}, +} + + + + + +@ARTICLE{jones19, + author = {{Jones}, M.~G. and {Verdes-Montenegro}, L. and {Damas-Segovia}, A. and + {Borthakur}, S. and {Yun}, M. and {del Olmo}, A. and {Perea}, J. and + {Rom{\'a}n}, J. and {Luna}, S. and {Lopez Gutierrez}, D. and + {Williams}, B. and {Vogt}, F.~P.~A. and {Garrido}, J. and + {Sanchez}, S. and {Cannon}, J. and {Ram{\'\i}rez-Moreta}, P.}, + title = "{Evolution of compact groups from intermediate to final stages. A case study of the H I content of HCG 16}", + journal = {Astronomy \& Astrophysics}, + eprint = {1910.03420}, + keywords = {galaxies: groups: individual: HCG 16, galaxies: interactions, galaxies: evolution, galaxies: ISM, radio lines: galaxies}, + year = "2019", + month = "Dec", + volume = {632}, + eid = {A78}, + pages = {A78}, + doi = {10.1051/0004-6361/201936349}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019A&A...632A..78J}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{banek19, + author = {{Banek}, Christine and {Thornton}, Adam and {Economou}, Frossie and + {Fausti}, Angelo and {Krughoff}, K. Simon and {Sick}, Jonathan}, + title = "{Why is the LSST Science Platform built on Kubernetes?}", + journal = {Proceedings of ADASS XXIX}, + volume = {arXiv}, + keywords = {Astrophysics - Instrumentation and Methods for Astrophysics}, + year = "2019", + month = "Nov", + eid = {arXiv:1911.06404}, + pages = {1911.06404}, +archivePrefix = {arXiv}, + eprint = {1911.06404}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv191106404B}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{fineberg19, + author = {Harvey V. Fineberg and David B. Allison and Lorena A. Barba and Dianne Chong and David L. Donoho and Juliana Freire and Gerald Gabrielse and Constantine Gatsonis and Edward Hall and Thomas H. Jordan and Dietram A. Scheufele and Victoria Stodden and Simine Vazire, Timothy D. Wilson and Wendy Wood and Jennifer Heimberg and Thomas Arrison and Michael Cohen and Michele Schwalbe and Adrienne Stith Butler and Barbara A. Wanchisen and Tina Winters and Rebecca Morgan and Thelma Cox and Lesley Webb and Garret Tyson and Erin Hammers Forstag}, + title = {Reproducibility and Replicability in Science}, + journal = {The National Academies Press}, + year = 2019, + pages = {1-256}, + doi = {10.17226/25303}, +} + + + + + +@ARTICLE{akhlaghi19, + author = {{Akhlaghi}, Mohammad}, + title = "{Carving out the low surface brightness universe with NoiseChisel}", + journal = {IAU Symposium 355}, + volume = {arXiv}, + keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Astrophysics - Astrophysics of Galaxies, Computer Science - Computer Vision and Pattern Recognition}, + year = "2019", + month = "Sep", + eid = {arXiv:1909.11230}, + pages = {1909.11230}, +archivePrefix = {arXiv}, + eprint = {1909.11230}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190911230A}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{cribbs19, + author = {Cribbs, AP and Luna-Valero, S and George, C and Sudbery, IM and Berlanga-Taylor, AJ and Sansom, SN and Smith, T and Ilott, NE and Johnson, J and Scaber, J and Brown, K and Sims, D and Heger, A}, + title = {CGAT-core: a python framework for building scalable, reproducible computational biology workflows [version 2; peer review: 1 approved, 1 approved with reservations]}, + journal = {F1000Research}, + year = 2019, + volume = 8, + pages = {377}, + doi = {10.12688/f1000research.18674.2}, +} + + + + + +@ARTICLE{brinckman19, +author = "Adam Brinckman and Kyle Chard and Niall Gaffney and Mihael Hategan and Matthew B. Jones and Kacper Kowalik and Sivakumar Kulasekaran and Bertram Ludäscher and Bryce D. Mecum and Jarek Nabrzyski and Victoria Stodden and Ian J. Taylor and Matthew J. Turk and Kandace Turner", + title = {Computing environments for reproducibility: Capturing the ``Whole Tale''}, + journal = {Future Generation Computer Systems}, + year = 2019, + volume = 94, + pages = 854, + doi = {10.1016/j.future.2017.12.029}, +} + + + + + +@ARTICLE{uhse19, + author = {Uhse, Simon and Pflug, Florian G. and {von Haeseler}, Arndt and Djamei, Armin}, + title = {Insertion Pool Sequencing for Insertional Mutant Analysis in Complex Host‐Microbe Interactions}, + journal = {Current Protocols in Plant Biology}, + volume = {4}, + year = "2019", + month = "July", + pages = {e20097}, + doi = {10.1002/cppb.20097}, +} + + + + + +@ARTICLE{alliez19, + author = {{Alliez}, Pierre and {Di Cosmo}, Roberto and {Guedj}, Benjamin and + {Girault}, Alain and {Hacid}, Mohand-Said and {Legrand}, Arnaud and + {Rougier}, Nicolas P.}, + title = "{Attributing and Referencing (Research) Software: Best Practices and Outlook from Inria}", + journal = {Computing in Science \& Engineering}, + volume = {22}, + keywords = {Computer Science - Digital Libraries, Computer Science - Software Engineering}, + year = "2019", + month = "May", + pages = {39-52}, +archivePrefix = {arXiv}, + eprint = {1905.11123}, + primaryClass = {cs.DL}, + doi = {10.1109/MCSE.2019.2949413}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190511123A}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{kneller19, + author = {Kneller,Gerald R. and Hinsen,Konrad}, + title = {Memory effects in a random walk description of protein structure ensembles}, + journal = {The Journal of Chemical Physics}, + volume = {150}, + year = {2019}, + pages = {064911}, + doi = {10.1063/1.5054887}, +} + + + + + +@article{tange18, + author = {Tange, Ole}, + title = {GNU Parallel 2018}, + Journal = {Zenodo}, + volume = {1146014}, + pages = {\href{https://doi.org/10.5281/zenodo.1146014}{DOI:10.5281/zenodo.1146014}}, + year = 2018, + ISBN = {9781387509881}, + doi = {10.5281/zenodo.1146014}, + url = {https://doi.org/10.5281/zenodo.1146014} +} + + + + + +@ARTICLE{rule18, + author = {Adam Rule and Aur\'elien Tabard and {James D.} Hollan}, + title = {Exploration and Explanation in Computational Notebooks}, + journal = {Proceedings of the 2018 CHI Conference on Human Factors in Computing Systems}, + volume = {1}, + year = {2018}, + pages = {30}, + doi = {10.1145/3173574.3173606}, +} + + + + + +@ARTICLE{plesser18, + author = {Hans E. Plesser}, + title = {Reproducibility vs. Replicability: A Brief History of a Confused Terminology}, + journal = {Frontiers in Neuroinformatics}, + volume = {11}, + year = {2018}, + pages = {76}, + doi = {10.3389/fninf.2017.00076}, +} + + + + + +@ARTICLE{zhang18, + author = {{Zhang}, Zhi-Yu and {Romano}, D. and {Ivison}, R.~J. and + {Papadopoulos}, Padelis P. and {Matteucci}, F.}, + title = "{Stellar populations dominated by massive stars in dusty starburst galaxies across cosmic time}", + journal = {Nature}, + keywords = {Astrophysics - Astrophysics of Galaxies}, + year = "2018", + month = "Jun", + volume = {558}, + number = {7709}, + pages = {260}, + doi = {10.1038/s41586-018-0196-x}, +archivePrefix = {arXiv}, + eprint = {1806.01280}, + primaryClass = {astro-ph.GA}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2018Natur.558..260Z}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{smart18, + author = {{Smart}, A.G.}, + title = {The war over supercooled water}, + journal = {Physics Today}, + volume = {Aug}, + year = "2018", + pages = {DOI:10.1063/PT.6.1.20180822a}, + doi = {10.1063/PT.6.1.20180822a}, +} + + + + + +@ARTICLE{kaiser18, + author = {{Kaiser}, J.}, + title = {Plan to replicate 50 high-impact cancer papers shrinks to just 18}, + journal = {Science}, + volume = {Jul}, + year = "2018", + pages = {31}, + doi = {10.1126/science.aau9619}, +} + + + + + +@ARTICLE{dicosmo18, + author = {{Di Cosmo}, Roberto and {Gruenpeter}, Morane and {Zacchiroli}, Stefano}, + title = {Identifiers for Digital Objects: The case of software source code preservation}, + journal = {Proceedings of iPRES 2018}, + year = "2018", + pages = {204.4}, + doi = {10.17605/osf.io/kde56}, +} + + + + + +@ARTICLE{gruning18, + author = {Gr\"uning, Bj\"orn and Chilton, John and K\"oster, Johannes and Dale, Ryan and Soranzo, Nicola and {van den Beek}, Marius and Goecks, Jeremy and Backofen, Rolf and Nekrutenko, Anton and Taylor, James}, + title = {Practical Computational Reproducibility in the Life Sciences}, + journal = {Cell Systems}, + volume = 6, + year = "2018", + pages = {631. bioRxiv:\href{https://www.biorxiv.org/content/10.1101/200683v2}{200683}}, + doi = {10.1016/j.cels.2018.03.014}, +} + + + + + +@ARTICLE{allen18, + author = {{Allen}, Alice and {Teuben}, Peter J. and {Ryan}, P. Wesley}, + title = "{Schroedinger's Code: A Preliminary Study on Research Source Code Availability and Link Persistence in Astrophysics}", + journal = {The Astrophysical Journal Supplement Series}, + keywords = {methods: numerical, Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Digital Libraries}, + year = "2018", + month = "May", + volume = {236}, + number = {1}, + eid = {10}, + pages = {10}, + doi = {10.3847/1538-4365/aab764}, +archivePrefix = {arXiv}, + eprint = {1801.02094}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2018ApJS..236...10A}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{burrell18, + author = {{Burrell}, A.G. and {Halford}, A. and {Klenzing}, J. and {Stoneback}, R.A. and {Morley}, S.K. and {Annex}, A.M. and {Laundal}, K.M. and {Kellerman}, A.C. and {Stansby}, D. and {Ma}, J.}, + title = {Snakes on a Spaceship—An Overview of Python in Heliophysics}, + journal = {Journal of Geophysical Research: Space Physics}, + volume = {123}, + year = "2018", + pages = {384}, + doi = {10.1029/2018JA025877}, +} + + + + + +@article{stodden18, + author = {{Stodden}, V. and {Seiler}, J. and {Ma}, Z.}, + title = {An empirical analysis of journal policy effectiveness for computational reproducibility}, + volume = {115}, + number = {11}, + pages = {2584}, + year = {2018}, + doi = {10.1073/pnas.1708290115}, + issn = {0027-8424}, + URL = {https://www.pnas.org/content/115/11/2584}, + journal = {Proceedings of the National Academy of Sciences} +} + + + + + +@article {fanelli18, + author = {{Fanelli}, D.}, + title = {Opinion: Is science really facing a reproducibility crisis, and do we need it to?}, + volume = {115}, + number = {11}, + pages = {2628}, + year = {2018}, + doi = {10.1073/pnas.1708272114}, + publisher = {National Academy of Sciences}, + issn = {0027-8424}, + URL = {https://www.pnas.org/content/115/11/2628}, + journal = {Proceedings of the National Academy of Sciences} +} + + + + + + +@ARTICLE{lewis18, + author = {{Lewis}, L.M. and {Edwards}, M.C. and {Meyers}, Z.R. and {Conover Talbot}, C. and {Hao}, H. and {Blum}, D. }, + title = "{Replication Study: Transcriptional amplification in tumor cells with elevated c-Myc}", + journal = {eLife}, + volume = {7}, + year = "2018", + month = "January", + pages = {e30274}, + doi = {10.7554/eLife.30274}, +} + + + + + +@ARTICLE{akhlaghi18b, + author = {{Akhlaghi}, Mohammad and {Bacon}, Roland and {Inami}, Hanae}, + title = "{MUSE HUDF survey I \& II, Sections 7.3 \& 3.4: photometry for objects with no prior broad-band segmentation map}", + journal = {Zenodo}, + pages = {DOI:10.5281/zenodo.1164774}, + year = "2018", + month = "February", + doi = {10.5281/zenodo.1164774}, +} + + + + + +@ARTICLE{akhlaghi18a, + author = {{Akhlaghi}, Mohammad and {Bacon}, Roland}, + title = "{MUSE HUDF survey I, Section 4: data and reproduction pipeline for photometry and astrometry}", + journal = {Zenodo}, + pages = {DOI:10.5281/zenodo.1163746}, + year = "2018", + month = "January", + doi = {10.5281/zenodo.1163746}, +} + + + + + +@ARTICLE{leek17, + author = {Jeffrey T. Leek and Leah R. Jager}, + title = {Is Most Published Research Really False?}, + journal = {Annual Review of Statistics and Its Application}, + volume = {4}, + year = {2017}, + pages = {109}, + doi = {10.1146/annurev-statistics-060116-054104}, +} + + + + + +@ARTICLE{reich17, + author = {Michael Reich and Thorin Tabor and Ted Liefeld and Helga Thorvaldsdóttir and Barbara Hill and Pablo Tamayo and Jill P. Mesirov}, + title = {The GenePattern Notebook Environment}, + journal = {Cell Systems}, + year = {2017}, + volume = {5}, + pages = {149}, + doi = {10.1016/j.cels.2017.07.003}, +} + + + + + +@ARTICLE{becker17, + author = {Gabriel Becker and Cory Barr and Robert Gentleman and Michael Lawrence}, + title = {Enhancing Reproducibility and Collaboration via Management of R Package Cohorts}, + journal = {Journal of Statistical Software, Articles}, + volume = {82}, + pages = 1, + year = "2017", +archivePrefix = {arXiv}, + eprint = {1501.02284}, + doi = {10.18637/jss.v082.i01}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150102284B}, +} + + + + + +@ARTICLE{jenness17, + author = {{Jenness}, Tim}, + title = "{Modern Python at the Large Synoptic Survey Telescope}", + journal = {ADASS 27}, + year = "2017", + month = "Dec", + eid = {arXiv:1712.00461}, + pages = {arXiv:1712.00461}, +archivePrefix = {arXiv}, + eprint = {1712.00461}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2017arXiv171200461J}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@article{bezanson17, + title={Julia: A fresh approach to numerical computing}, + author={Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B}, + journal={SIAM {R}eview}, + volume={59}, + number={1}, + pages={65}, + year={2017}, + archivePrefix={arXiv}, + eprint={1411.1607}, + publisher={SIAM}, + doi={10.1137/141000671}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2014arXiv1411.1607B}, +} + + + + + +@ARTICLE{melson17, + author = {{Melsen}, L.A. and {Torfs}, P.J.J.F and {Uijlenhoet}, R. and {Teuling}, A.J.}, + title = {Comment on “Most computational hydrology is not reproducible, so is it really science?” by Christopher Hutton et al.}, + journal = {Water Resources Research}, + volume = 53, + pages = {2568}, + year = {2017}, + doi = {10.1002/2016WR020208}, +} + + + + + +@ARTICLE{munafo17, + author = {{Munaf\'o}, M.R. and {Nosek}, B.A. and {Bishop}, D.V.M. and {Button}, K.S. and {Chambers}, C.D. and {Percie du Sert}, N. and {Simonsohn}, U. and {Wagenmakers}, E.J. and {Ware}, J.J. {Ioannidis}, J.P.A.}, + title = {A manifesto for reproducible science}, + journal = {Nature Human Behaviour}, + volume = 1, + pages = {21}, + year = {2017}, + doi = {10.1038/s41562-016-0021}, +} + + + + + +@ARTICLE{jimenez17, + title={The popper convention: Making reproducible systems evaluation practical}, + author = {{Jimenez}, I. and {Sevilla}, M. and {Watkins}, N. and {Maltzahn}, C. and {Lofstead}, J. and {Mohror}, K. and {Arpaci-Dusseau}, A. and {Arpaci-Dusseau}, R.}, + journal = {IEEE IPDPSW}, + pages = {1561}, + year = {2017}, + doi = {10.1109/IPDPSW.2017.157}, +} + + + + + +@ARTICLE{bacon17, + author = {{Bacon}, Roland and {Conseil}, Simon and {Mary}, David and + {Brinchmann}, Jarle and {Shepherd}, Martin and {Akhlaghi}, Mohammad and + {Weilbacher}, Peter M. and {Piqueras}, Laure and {Wisotzki}, Lutz and + {Lagattuta}, David and {Epinat}, Benoit and {Guerou}, Adrien and + {Inami}, Hanae and {Cantalupo}, Sebastiano and + {Courbot}, Jean Baptiste and {Contini}, Thierry and {Richard}, Johan and + {Maseda}, Michael and {Bouwens}, Rychard and {Bouch{\'e}}, Nicolas and + {Kollatschny}, Wolfram and {Schaye}, Joop and {Marino}, Raffaella Anna and + {Pello}, Roser and {Herenz}, Christian and {Guiderdoni}, Bruno and + {Carollo}, Marcella}, + title = "{The MUSE Hubble Ultra Deep Field Survey. I. Survey description, data reduction, and source detection}", + journal = {Astronomy \& Astrophysics}, + keywords = {galaxies: distances and redshifts, galaxies: high-redshift, cosmology: observations, methods: data analysis, techniques: imaging spectroscopy, galaxies: formation, Astrophysics - Astrophysics of Galaxies}, + year = "2017", + month = "Nov", + volume = {608}, + eid = {A1}, + pages = {A1}, + doi = {10.1051/0004-6361/201730833}, +archivePrefix = {arXiv}, + eprint = {1710.03002}, + primaryClass = {astro-ph.GA}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2017A\&A...608A...1B}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{austin17, + author = {{Claire C.} Austin and Theodora Bloom and Sünje Dallmeier-Tiessen and {Varsha K.} Khodiyar and Fiona Murphy and Amy Nurnberger and Lisa Raymond and Martina Stockhause and Jonathan Tedds and Mary Vardigan and Angus Whyte}, + title = {Key components of data publishing: using current best practices to develop a reference model for data publishing}, + journal = {International Journal on Digital Libraries}, + volume = {18}, + year = {2017}, + pages = {77-92}, + doi = {10.1007/s00799-016-0178-2}, +} + + + + + +@ARTICLE{smith16, + author = {Arfon M. Smith and Daniel S. Katz and Kyle E. Niemeyer}, + title = {Software citation principles}, + journal = {PeerJ Computer Science}, + volume = {2}, + year = {2016}, + pages = {e86}, + doi = {10.7717/peerj-cs.86}, +} + + + + + +@ARTICLE{ziemann16, + author = {Mark Ziemann and Yotam Eren and Assam El-Osta}, + title = {Gene name errors are widespread in the scientific literature}, + journal = {Genome Biology}, + volume = {17}, + year = {2016}, + pages = {177}, + doi = {10.1186/s13059-016-1044-7}, +} + + + + + +@ARTICLE{hinsen16, + author = {Konrad Hinsen}, + title = {Scientific notations for the digital era}, + journal = {The Self Journal of Science}, + year = {2016}, + pages = {1: arXiv:\href{https://arxiv.org/abs/1605.02960}{1605.02960}}, +} + + + + + +@ARTICLE{kluyver16, + author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando Pérez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Damián Avila and Safia Abdalla and Carol Willing}, + title = "{Jupyter Notebooks – a publishing format for reproducible computational workflows}", + journal = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, + year = {2016}, + pages = {87}, + doi = {10.3233/978-1-61499-649-1-87}, +} + + + + + +@ARTICLE{baker16, + author = {{Baker}, M.}, + title = "{Is there a reproducibility crisis?}", + journal = {Nature}, + volume = {533}, + year = "2016", + month = "May", + pages = {452}, + doi = {10.1038/533452a}, +} + + + + + +@ARTICLE{wilkinson16, + author = { {Wilkinson}, M.D and {Dumontier}, M. and {Aalbersberg}, I.J. and {Appleton}, G. and {Axton}, M. and {Baak}, A. and {Blomberg}, N. and {Boiten}, J. and {da Silva Santos}, L.B and {Bourne}, P.E. and {Bouwman}, J. and {Brookes}, A.J. and {Clark}, T. and {Crosas}, M. and {Dillo}, I. and {Dumon}, O. and {Edmunds}, S. and {Evelo}, C. and {Finkers}, R. and {Gonzalez-Beltran}, A. and {Gray}, A.J.G. and {Groth}, P. and {Goble}, C. and {Grethe}, Jeffrey S. and {Heringa}, J. and {’t Hoen}, P.A.C and {Hooft}, R. and {Kuhn}, T. and {Kok}, R. and {Kok}, J. and {Lusher}, S. and {Martone}, M. and {Mons}, A. and {Packer}, A. and {Persson}, B. and {Rocca-Serra}, P. and {Roos}, M. and {van Schaik}, R. and {Sansone}, S. and {Schultes}, E. and {Sengstag}, T. and {Slater}, T. and {Strawn}, G. and {Swertz}, M. and {Thompson}, M. and {van der Lei}, J. and {van Mulligen}, E. and {Velterop}, J. and {Waagmeester}, A. and {Wittenburg}, P. and {Wolstencroft}, K. and {Zhao}, J. and {Mons}, B.}, + title = "{The FAIR Guiding Principles for scientific data management and stewardship}", + journal = {Scientific Data}, + year = 2016, + month = mar, + volume = 3, + pages = {160018}, + doi = {10.1038/sdata.2016.18}, +} + + + + +@ARTICLE{hutton16, + author = {{Hutton}, C. and {Wagener}, T. and {Freer}, J. and {Han}, D. and {Duffy}, C. and {Arheimer}, B.}, + title = {Most computational hydrology is not reproducible, so is it really science?}, + journal = {Water Resources Research}, + year = {2016}, + volume = 52, + pages = {7548}, + doi = {10.1002/2016WR019285}, +} + + + + + +@ARTICLE{topalidou16, + author = {{Topalidou}, M. and {Leblois}, A. and {Boraud}, T. and {Rougier}, N.P.}, + title = {A long journey into reproducible computational neuroscience}, + journal = {Frontiers in Computational Neuroscience}, + year = {2016}, + volume = 9, + pages = {30}, + doi = {10.3389/fncom.2015.00030}, +} + + + + + +@ARTICLE{gil16, + author = {{Gil}, Yolanda and {David}, C.H. and {Demir}, I. and {Essawy}, B.T. and {Fulweiler}, R.W. and {Goodall}, J.L. and {Karlstrom}, L. and {Lee}, H. and {Mills}, H.J. and {Oh}, J. and {Pierce}, S.A. and {Pope}, A. and {Tzeng}, M.W. and {Villamizar}, S.R. and {Yu}, X}, + title = {Toward the Geoscience Paper of the Future: Best practices for documenting and sharing research from data to software to provenance}, + journal = {Earth and Space Science}, + year = 2016, + volume = 3, + pages = {388}, + doi = {10.1002/2015EA000136}, +} + + + + + +@ARTICLE{romine15, + author = {Charles H. Romine}, + title = {Secure Hash Standard (SHS)}, + journal = {Federal Information processing standards publication}, + volume = {180}, + pages = {4}, + year = {2015}, + doi = {10.6028/NIST.FIPS.180-4}, +} + + + + + +@ARTICLE{horvath15, + author = {Steve Horvath}, + title = {Erratum to: DNA methylation age of human tissues and cell types}, + journal = {Genome Biology}, + volume = {16}, + pages = {96}, + year = {2015}, + doi = {10.1186/s13059-015-0649-6}, +} + + + + + +@ARTICLE{chang15, + author = {Andrew C. Chang and Phillip Li}, + title = {Is Economics Research Replicable? Sixty Published Papers from Thirteen Journals Say ``Usually Not''}, + journal = {Finance and Economics Discussion Series 2015-083}, + year = {2015}, + pages = {1}, + doi = {10.17016/FEDS.2015.083}, +} + + + + + +@ARTICLE{schaffer15, + author = {Jonathan Schaffer}, + title = {What Not to Multiply Without Necessity}, + journal = {Australasian Journal of Philosophy}, + volume = {93}, + pages = {644}, + year = {2015}, + doi = {10.1080/00048402.2014.992447}, +} + + + + + +@ARTICLE{clarkso15, + author = "Chris Clarkson and Mike Smith and Ben Marwick and Richard Fullagar and Lynley A. Wallis and Patrick Faulkner and Tiina Manne and Elspeth Hayes and Richard G. Roberts and Zenobia Jacobs and Xavier Carah and Kelsey M. Lowe and Jacqueline Matthews and S. Anna Florin", + title = {The archaeology, chronology and stratigraphy of Madjedbebe (Malakunanja II): A site in northern Australia with early occupation}, + journal = {Journal of Human Evolution}, + year = 2015, + volume = 83, + pages = 46, + doi = {10.1016/j.jhevol.2015.03.014}, +} + + + + + +@ARTICLE{meng15, + author = {Haiyan Meng and Rupa Kommineni and Quan Pham and Robert Gardner and Tanu Malik and Douglas Thain}, + title = {An invariant framework for conducting reproducible computational science}, + journal = {Journal of Computational Science}, + year = 2015, + volume = 9, + pages = 137, + doi = {10.1016/j.jocs.2015.04.012}, +} + + + + + +@ARTICLE{gamblin15, + author = {Gamblin, Todd and LeGendre, Matthew and Collette, Michael R. and Lee, Gregory L. and Moody, Adam and {de Supinski}, Bronis R. and Futral, Scott}, + title = {The Spack package manager: bringing order to HPC software chaos}, + journal = {IEEE SC15}, + year = 2015, + volume = 1, + pages = {1}, + doi = {10.1145/2807591.2807623}, +} + + + + +@ARTICLE{akhlaghi15, + author = {{Akhlaghi}, M. and {Ichikawa}, T.}, + title = "{Noise-based Detection and Segmentation of Nebulous Objects}", + journal = {The Astrophysical Journal Supplement Series}, + archivePrefix = "arXiv", + eprint = {1505.01664}, + primaryClass = "astro-ph.IM", + keywords = {galaxies: irregular, galaxies: photometry, galaxies: structure, methods: data analysis, techniques: image processing, techniques: photometric}, + year = 2015, + month = sep, + volume = 220, + eid = {1}, + pages = {1-33}, + doi = {10.1088/0067-0049/220/1/1}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2015ApJS..220....1A}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{courtes15, + author = {{Court{\'e}s}, Ludovic and {Wurmus}, Ricardo}, + title = {Reproducible and User-Controlled Software Environments in HPC with Guix}, + journal = {Euro-Par}, + volume = {9523}, + keywords = {Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Operating Systems, Computer Science - Software Engineering}, + year = {2015}, + month = {Jun}, + eid = {arXiv:1506.02822}, + pages = {arXiv:1506.02822}, +archivePrefix = {arXiv}, + eprint = {1506.02822}, + primaryClass = {cs.DC}, + doi = {10.1007/978-3-319-27308-2_47}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150602822C}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{hinsen15, + author = {{Hinsen}, K.}, + title = {ActivePapers: a platform for publishing and archiving computer-aided research}, + journal = {F1000Research}, + year = 2015, + volume = 3, + pages = {289}, + doi = {10.12688/f1000research.5773.3}, +} + + + + + +@ARTICLE{belhajjame15, + author = {{Belhajjame}, K. and {Zhao}, Z. and {Garijo}, D. and {Gamble}, M. and {Hettne}, K. and {Palma}, R. and {Mina}, E. and {Corcho}, O. and {Gómez-Pérez}, J.M. and {Bechhofer}, S. and {Klyne}, G. and {Goble}, C}, + title = "{Using a suite of ontologies for preserving workflow-centric research objects}", + journal = {Journal of Web Semantics}, + year = 2015, + volume = 32, + pages = {16}, + doi = {10.1016/j.websem.2015.01.003}, +} + + + + + +@ARTICLE{bechhofer13, + author = {{Bechhofer}, S. and {Buchan}, I. and {De Roure}, D. and {Missier}, P. and {Ainsworth}, J. and {Bhagat}, J. and Couch, P. and Cruickshank, D. and {Delderfield}, M and Dunlop, I. and {Gamble}, M. and {Michaelides}, D. and {Owen}, S. and {Newman}, D. and {Sufi}, S. and {Goble}, C}, + title = "{Why linked data is not enough for scientists}", + journal = {Future Generation Computer Systems}, + year = 2013, + volume = 29, + pages = {599}, + doi = {10.1016/j.future.2011.08.004}, +} + + + + + +@ARTICLE{peng15, + author = {{Peng}, R.D.}, + title = {The reproducibility crisis in science: A statistical counterattack}, + journal = {Significance}, + year = 2015, + month = jun, + volume = 12, + pages = {30}, + doi = {10.1111/j.1740-9713.2015.00827.x}, +} + + + + + +@ARTICLE{katz14, + author = {Daniel S. Katz}, + title = {Transitive Credit as a Means to Address Social and Technological Concerns Stemming from Citation and Attribution of Digital Products}, + journal = {Journal of Open Research Software}, + year = {2014}, + volume = {2}, + pages = {e20}, + doi = {10.5334/jors.be}, +} + + + + + +@ARTICLE{herndon14, + author = {Thomas Herndon and Michael Ash and Robert Pollin}, + title = {Does high public debt consistently stifle economic growth? A critique of Reinhart and Rogoff}, + journal = {Cambridge Journal of Economics}, + year = {2014}, + month = {dec}, + volume = {38}, + pages = {257}, + doi = {10.1093/cje/bet075}, +} + + + + + +@ARTICLE{easterbrook14, + author = {{Easterbook}, S.}, + title = {Open code for open science?}, + journal = {Nature Geoscience}, + year = 2014, + month = oct, + volume = 7, + pages = {779}, + doi = {10.1038/ngeo2283}, +} + + + + + +@ARTICLE{fomel13, + author = {Sergey Fomel and Paul Sava and Ioan Vlad and Yang Liu and Vladimir Bashkardin}, + title = {Madagascar: open-source software project for multidimensional data analysis and reproducible computational experiments}, + journal = {Journal of open research software}, + year = {2013}, + volume = {1}, + pages = {e8}, + doi = {10.5334/jors.ag}, +} + + + + + +@ARTICLE{sandve13, + author = {{Sandve}, G.K. and {Nekrutenko}, A. and {Taylor}, J. and {Hovig}, E.}, + title = {Ten Simple Rules for Reproducible Computational Research}, + journal = {PLoS Computational Biology}, + year = 2013, + month = oct, + volume = 9, + pages = {e1003285}, + doi = {10.1371/journal.pcbi.1003285}, +} + + + + + +@ARTICLE{malik13, + author = {Tanu Malik and Quan Pham and Ian Foster}, + title = {SOLE: Towards Descriptive and Interactive Publications}, + journal = {Implementing Reproducible Research}, + year = 2013, + volume = {Chapter 2}, + pages = {1. URL: \url{https://osf.io/ns2m3}}, +} + + + + + +@ARTICLE{gronenschild12, + author = {Ed H. B. M. Gronenschild and Petra Habets and Heidi I. L. Jacobs and Ron Mengelers and Nico Rozendaal and Jim van Os and Machteld Marcelis}, + title = {The Effects of FreeSurfer Version, Workstation Type, and Macintosh Operating System Version on Anatomical Volume and Cortical Thickness Measurements}, + journal = {PLoS ONE}, + volume = {7}, + year = {2012}, + pages = {e38234}, + doi = {10.1371/journal.pone.0038234}, +} + + + + + +@ARTICLE{pham12, + author = {Quan Pham and Tanu Malik and Ian Foster and Roberto {Di Lauro} and Raffaele Montella}, + title = {SOLE: Linking Research Papers with Science Objects}, + journal = {Provenance and Annotation of Data and Processes (IPAW)}, + year = {2012}, + pages = {203}, + doi = {10.1007/978-3-642-34222-6_16}, +} + + + + + +@ARTICLE{davison12, + author = {Andrew Davison}, + title = {Automated Capture of Experiment Context for Easier Reproducibility in Computational Research}, + journal = {Computing in Science \& Engineering}, + volume = {14}, + year = {2012}, + pages = {48}, + doi = {10.1109/MCSE.2012.41}, +} + + + + + +@ARTICLE{zhao12, + author = {Jun Zhao and Jose Manuel Gomez-Perez and Khalid Belhajjame and Graham Klyne and Esteban Garcia-Cuesta and Aleix Garrido and Kristina Hettne and Marco Roos and David {De Roure} and Carole Goble}, + title = {Why workflows break — Understanding and combating decay in Taverna workflows}, + journal = {IEEE 8th International Conference on E-Science}, + year = {2012}, + pages = {1}, + doi = {10.1109/eScience.2012.6404482}, +} + + + + +@ARTICLE{vangorp11, + author = {Pieter {Van Gorp} and Steffen Mazanek}, + title = {SHARE: a web portal for creating and sharing executable research}, + journal = {Procedia Computer Science}, + year = 2011, + volume = 4, + pages = {589}, + doi = {10.1016/j.procs.2011.04.062}, +} + + + + + +@ARTICLE{hinsen11, + author = {{Hinsen}, Konrad}, + title = {A data and code model for reproducible research and executable papers}, + journal = {Procedia Computer Science}, + year = 2011, + volume = 4, + pages = {579}, + doi = {10.1016/j.procs.2011.04.061}, +} + + + + + +@ARTICLE{limare11, + author = {Nicolas Limare and Jean-Michel Morel}, + title = {The IPOL Initiative: Publishing and Testing Algorithms on Line for +Reproducible Research in Image Processing}, + journal = {Procedia Computer Science}, + year = 2011, + volume = 4, + pages = {716}, + doi = {10.1016/j.procs.2011.04.075}, +} + + + + + +@ARTICLE{gavish11, + author = {Matan Gavish and David L. Donoho}, + title = {A Universal Identifier for Computational Results}, + journal = {Procedia Computer Science}, + year = 2011, + volume = 4, + pages = {637}, + doi = {10.1016/j.procs.2011.04.067}, +} + + + + +@ARTICLE{gabriel11, + author = {Ann Gabriel and Rebecca Capone}, + title = {Executable Paper Grand Challenge Workshop}, + journal = {Procedia Computer Science}, + volume = {4}, + year = {2011}, + pages = {577}, + doi = {10.1016/j.procs.2011.04.060}, +} + + + + + +@ARTICLE{nowakowski11, + author = {Piotr Nowakowski and Eryk Ciepiela and Daniel Har\k{e}\.{z}lak and Joanna Kocot and Marek Kasztelnik and Tomasz Barty\'nski and Jan Meizner and Grzegorz Dyk and Maciej Malawski}, + title = {The Collage Authoring Environment}, + journal = {Procedia Computer Science}, + volume = {4}, + year = {2011}, + pages = {608}, + doi = {j.procs.2011.04.064}, +} + + + + + +@ARTICLE{peng11, + author = {{Peng}, R.D.}, + title = {Reproducible Research in Computational Science}, + journal = {Science}, + year = {2011}, + month = dec, + volume = 334, + pages = {1226}, + doi = {10.1126/science.1213847}, +} + + + + + +@ARTICLE{gil10, + author = {Yolanda Gil and Pedro A. González-Calero and Jihie Kim and Joshua Moody and Varun Ratnakar}, + title = {A semantic framework for automatic generation of computational workflows using distributed data and component catalogues}, + journal = {Journal of Experimental \& Theoretical Artificial Intelligence}, + year = {2010}, + volume = {23}, + pages = {389}, + doi = {10.1080/0952813X.2010.490962}, +} + + + + + +@ARTICLE{pence10, + author = {{Pence}, W.~D. and {Chiappetti}, L. and {Page}, C.~G. and {Shaw}, R.~A. and + {Stobie}, E.}, + title = "{Definition of the Flexible Image Transport System (FITS), version 3.0}", + journal = {Astronomy and Astrophysics}, + keywords = {instrumentation: miscellaneous, methods: miscellaneous, techniques: miscellaneous, reference systems, standards, astronomical databases: miscellaneous}, + year = "2010", + month = "Dec", + volume = {524}, + eid = {A42}, + pages = {A42}, + doi = {10.1051/0004-6361/201015362}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2010A\&A...524A..42P}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + + +@ARTICLE{goecks10, + author = {Jeremy Goecks and Anton Nekrutenko and James Taylor}, + title = {Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences}, + journal = {Genome Biology}, + year = {2010}, + volume = {11}, + pages = {R86}, + doi = {10.1186/gb-2010-11-8-r86}, +} + + + + + +@ARTICLE{merali10, + author = {Zeeya Merali}, + title = {Computational science: ...Error}, + journal = {Nature}, + year = 2010, + volume = 467, + pages = {775}, + doi = {10.1038/467775a}, +} + + + + + +@ARTICLE{casadevall10, + author = {{Casadevall}, A. and {Fang}, F.C}, + title = {Reproducible Science}, + journal = {Infection and Immunity}, + year = 2010, + volume = 78, + pages = {4972}, + doi = {10.1128/IAI.00908-10}, +} + + + + + +@ARTICLE{mesirov10, + author = {{Mesirov}, J.P.}, + title = {Accessible Reproducible Research}, + journal = {Science}, + year = 2010, + volume = 327, + pages = {415}, + doi = {10.1126/science.1179653}, +} + + + + + +@ARTICLE{cheney09, + author = {James Cheney and Laura Chiticariu and Wang-Chiew Tan}, + title = {Provenance in Databases: Why, How, and Where}, + journal = {Foundations and Trends in Databases}, + year = {2009}, + volume = {1}, + pages = {379}, + doi = {10.1561/1900000006}, +} + + + + + +@ARTICLE{ioannidis2009, + author = {John P. A. Ioannidis and David B. Allison and Catherine A. Ball and Issa Coulibaly and Xiangqin Cui and Aedín C Culhane and Mario Falchi and Cesare Furlanello and Laurence Game and Giuseppe Jurman and Jon Mangion and Tapan Mehta and Michael Nitzberg and Grier P. Page and Enrico Petretto and Vera {van Noort}}, + title = {Repeatability of published microarray gene expression analyses}, + journal = {Nature Genetics}, + year = {2009}, + volume = {41}, + pages = {149}, + doi = {10.1038/ng.295}, +} + + + + + +@ARTICLE{fomel09, + author = {Sergey Fomel and Jon F. Claerbout}, + title = {Reproducible Research}, + journal = {Computing in Science Engineering}, + year = {2009}, + volume = {11}, + pages = {5}, + doi = {10.1109/MCSE.2009.14}, +} + + + + + +@ARTICLE{baggerly09, + author = {Keith A. Baggerly and Kevin R Coombes}, + title = {Deriving chemosensitivity from cell lines: Forensic bioinformatics and reproducible research in high-throughput biology}, + journal = {The Annals of Applied Statistics}, + year = {2009}, + volume = {3}, + pages = {1309}, + doi = {10.1214/09-AOAS291}, +} + + + + + +@ARTICLE{scheidegger08, + author = {Carlos Scheidegger and David Koop and Emanuele Santos and Huy Vo and Steven Callahan and Juliana Freire and Cláudio Silva}, + title = {Tackling the Provenance Challenge one layer at a time}, + journal = {Concurrency Computation: Practice and Experiment}, + year = {2008}, + volume = {20}, + pages = {473}, + doi = {10.1002/cpe.1237}, +} + + + + + +@ARTICLE{moreau08, + author = {Moreau, Luc and Ludäscher, Bertram and Altintas, Ilkay and Barga, Roger S. and Bowers, Shawn and Callahan, Steven and Chin JR., George and Clifford, Ben and Cohen, Shirley and Cohen-Boulakia, Sarah and Davidson, Susan and Deelman, Ewa and Digiampietri, Luciano and Foster, Ian and Freire, Juliana and Frew, James and Futrelle, Joe and Gibson, Tara and Gil, Yolanda and Goble, Carole and Golbeck, Jennifer and Groth, Paul and Holland, David A. and Jiang, Sheng and Kim, Jihie and Koop, David and Krenek, Ales and McPhillips, Timothy and Mehta, Gaurang and Miles, Simon and Metzger, Dominic and Munroe, Steve and Myers, Jim and Plale, Beth and Podhorszki, Norbert and Ratnakar, Varun and Santos, Emanuele and Scheidegger, Carlos and Schuchardt, Karen and Seltzer, Margo and Simmhan, Yogesh L. and Silva, Claudio and Slaughter, Peter and Stephan, Eric and Stevens, Robert and Turi, Daniele and Vo, Huy and Wilde, Mike and Zhao, Jun and Zhao, Yong}, + title = {The First Provenance Challenge}, + journal = {Concurrency Computation: Practice and Experiment}, + year = {2008}, + volume = {20}, + pages = {473}, + doi = {10.1002/cpe.1233}, +} + + + + + +@Article{matplotlib2007, + Author = {Hunter, J. D.}, + Title = {Matplotlib: A 2D graphics environment}, + Journal = {CiSE}, + Volume = {9}, + Number = {3}, + Pages = {90}, + abstract = {Matplotlib is a 2D graphics package used for Python + for application development, interactive scripting, and + publication-quality image generation across user + interfaces and operating systems.}, + publisher = {IEEE COMPUTER SOC}, + doi = {10.1109/MCSE.2007.55}, + year = 2007 +} + + + + + +@ARTICLE{witten2007, + author = {Ben Witten and Bill Curry and Jeff Shragge}, + title = {A New Build Environment for SEP}, + journal = {Stanford Exploration Project}, + year = {2007}, + volume = {129}, + pages = {247: \url{http://sepwww.stanford.edu/data/media/public/docs/sep129/ben1.pdf}}, +} + + + + + +@ARTICLE{miller06, + author = {Greg Miller}, + title = {A Scientist's Nightmare: Software Problem Leads to Five Retractions}, + journal = {Science}, + year = {2006}, + volume = {314}, + pages = {1856}, + doi = {10.1126/science.314.5807.1856}, +} + + + + + +@ARTICLE{reich06, + author = {Michael Reich and Ted Liefeld and Joshua Gould and Jim Lerner and Pablo Tamayo and Jill P Mesirov}, + title = {GenePattern 2.0}, + journal = {Nature Genetics}, + year = {2006}, + volume = {38}, + pages = {500}, + doi = {10.1038/ng0506-500}, +} + + + + + +@ARTICLE{ludascher05, + author = {Ludäs\-cher, Bertram and Altintas, Ilkay and Berkley, Chad and Higgins, Dan and Jaeger, Efrat and Jones, Matthew and Lee, Edward A. and Tao, Jing and Zhao, Yang}, + title = {Scientific workflow management and the Kepler system}, + journal = {Concurrency Computation: Practice and Experiment}, + year = {2006}, + volume = {18}, + pages = {1039}, + doi = {10.1002/cpe.994}, +} + + + + + +@ARTICLE{ioannidis05, + author = {John P. A. Ioannidis}, + title = {Why Most Published Research Findings Are False}, + journal = {PLoS Medicine }, + year = {2005}, + volume = {2}, + pages = {e124}, + doi = {10.1371/journal.pmed.0020124}, +} + + + + + +@ARTICLE{bavoil05, + author = {Louis Bavoil and Steven P. Callahan and Patricia J. Crossno and Juliana Freire and Carlos E. Scheidegger and Cláudio T. Silva and Huy T. Vo}, + title = {VisTrails: Enabling Interactive Multiple-View Visualizations}, + journal = {VIS 05. IEEE Visualization}, + year = {2005}, + volume = {}, + pages = {135}, + doi = {10.1109/VISUAL.2005.1532788}, +} + + + + + +@ARTICLE{dolstra04, + author = {{Dolstra}, Eelco and {de Jonge}, Merijn and {Visser}, Eelco}, + title = {Nix: A Safe and Policy-Free System for Software Deployment}, + journal = {Large Installation System Administration Conference}, + year = {2004}, + volume = {18}, + pages = {79. \url{https://www.usenix.org/legacy/events/lisa04/tech/full_papers/dolstra/dolstra.pdf}}, +} + + + + + +@ARTICLE{oinn04, + author = {Oinn, Tom and Addis, Matthew and Ferris, Justin and Marvin, Darren and Senger, Martin and Greenwood, Mark and Carver, Tim and Glover, Kevin and Pocock, Matthew R. and Wipat, Anil and Li, Peter}, + title = {Taverna: a tool for the composition and enactment of bioinformatics workflows}, + journal = {Bioinformatics}, + year = {2004}, + volume = {20}, + pages = {3045}, + doi = {10.1093/bioinformatics/bth361}, +} + + + + + +@ARTICLE{schwab2000, + author = {Matthias Schwab and Martin Karrenbach and Jon F. Claerbout}, + title = {Making scientific computations reproducible}, + journal = {Computing in Science \& Engineering}, + year = {2000}, + volume = {2}, + pages = {61}, + doi = {10.1109/5992.881708}, +} + + + + + +@ARTICLE{buckheit1995, + author = {Jonathan B. Buckheit and David L. Donoho}, + title = {WaveLab and Reproducible Research}, + journal = {Wavelets and Statistics}, + year = {1995}, + volume = {1}, + pages = {55}, + doi = {10.1007/978-1-4612-2544-7\_5}, +} + + + + + +@ARTICLE{claerbout1992, + author = {Jon F. Claerbout and Martin Karrenbach}, + title = {Electronic documents give reproducible research a new meaning}, + journal = {SEG Technical Program Expanded Abstracts}, + year = {1992}, + volume = {1}, + pages = {601-604}, + doi = {10.1190/1.1822162}, +} + + + + + +@ARTICLE{eker03, + author = {Johan Eker and Jorn W Janneck and Edward A. Lee and Jie Liu and Xiaojun Liu and Jozsef Ludvig and Sonia Sachs and Yuhong Xiong and Stephen Neuendorffer}, + title = {Taming heterogeneity - the Ptolemy approach}, + journal = {Proceedings of the IEEE}, + year = {2003}, + volume = {91}, + pages = {127}, + doi = {10.1109/JPROC.2002.805829}, +} + + + + + +@ARTICLE{stevens03, + author = {Robert Stevens and Kevin Glover and Chris Greenhalgh and Claire Jennings and Simon Pearce and Peter Li and Melena Radenkovic and Anil Wipat}, + title = {Performing in silico Experiments on the Grid: A Users Perspective}, + journal = {Proceedings of UK e-Science All Hands Meeting}, + year = {2003}, + pages = {43}, +} + + + + + +@ARTICLE{knuth84, + author = {Donald Knuth}, + title = {Literate Programming}, + journal = {The Computer Journal}, + year = {1984}, + volume = {27}, + pages = {97}, + doi = {10.1093/comjnl/27.2.97}, +} + + + + + +@ARTICLE{stallman88, + author = {Richard M. Stallman and Roland McGrath and Paul D. Smith}, + title = {GNU Make: a program for directing recompilation}, + journal = {Free Software Foundation}, + year = {1988}, + pages = {ISBN:1-882114-83-3. \url{https://www.gnu.org/s/make/manual/make.pdf}}, +} + + + + + +@ARTICLE{somogyi87, + author = {Zoltan Somogyi}, + title = {Cake: a fifth generation version of make}, + journal = {University of Melbourne}, + year = {1987}, + pages = {1: \url{https://pdfs.semanticscholar.org/3e97/3b5c9af7763d70cdfaabdd1b96b3b75b5483.pdf}}, +} + + + + + +@ARTICLE{feldman79, + author = {Stuart I. Feldman}, + title = {Make -- a program for maintaining computer programs}, + journal = {Journal of Software: Practice and Experience}, + volume = {9}, + pages = {255}, + year = {1979}, + doi = {10.1002/spe.4380090402}, +} + + + + + +@ARTICLE{mcilroy78, + author = {M. D. McIlroy and E. N. Pinson and B. A. Tague}, + title = {UNIX Time-Sharing System: Forward}, + journal = {\doihref{https://archive.org/details/bstj57-6-1899/mode/2up}{Bell System Technical Journal}}, + year = {1978}, + volume = {57}, + pages = {6, ark:/13960/t0gt6xf72}, + doi = {}, +} + + + + + +@ARTICLE{anscombe73, + author = {{Anscombe}, F.J.}, + title = {Graphs in Statistical Analysis}, + journal = {The American Statistician}, + year = {1973}, + volume = {27}, + pages = {17}, + doi = {10.1080/00031305.1973.10478966}, +} + + + + + +@ARTICLE{roberts69, + author = {{Roberts}, K.V.}, + title = {The publication of scientific fortran programs}, + journal = {Computer Physics Communications}, + year = {1969}, + volume = {1}, + pages = {1}, + doi = {10.1016/0010-4655(69)90011-3}, +} -- cgit v1.2.1