From a16f22881841e57f2652f2a17b7f60b5106b2e60 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Wed, 7 Feb 2018 20:37:15 +0100 Subject: First commit to the reproduction pipeline template Let's start working on this pipeline independently with this first commit. It is based on my previous experiences, but I had never made a skeleton of a pipeline before, it was always within a working analysis. But now that the pipeline has a separate repository for its self, we will be able to work on it and use it as a base for future work and modify it to make it even better. Hopefully in time (and with the help of others), it will grow and become much more robust and useful. --- reproduce/config/gnuastro/gnuastro.conf | 36 +++++++++ reproduce/config/pipeline/DIRECTORIES.mk.in | 50 ++++++++++++ reproduce/config/pipeline/filters.mk | 25 ++++++ reproduce/config/pipeline/pdf.mk | 14 ++++ reproduce/config/pipeline/web.mk | 6 ++ reproduce/src/make/download.mk | 68 +++++++++++++++++ reproduce/src/make/initialize.mk | 114 ++++++++++++++++++++++++++++ reproduce/src/make/paper.mk | 34 +++++++++ 8 files changed, 347 insertions(+) create mode 100644 reproduce/config/gnuastro/gnuastro.conf create mode 100644 reproduce/config/pipeline/DIRECTORIES.mk.in create mode 100644 reproduce/config/pipeline/filters.mk create mode 100644 reproduce/config/pipeline/pdf.mk create mode 100644 reproduce/config/pipeline/web.mk create mode 100644 reproduce/src/make/download.mk create mode 100644 reproduce/src/make/initialize.mk create mode 100644 reproduce/src/make/paper.mk (limited to 'reproduce') diff --git a/reproduce/config/gnuastro/gnuastro.conf b/reproduce/config/gnuastro/gnuastro.conf new file mode 100644 index 0000000..49f9906 --- /dev/null +++ b/reproduce/config/gnuastro/gnuastro.conf @@ -0,0 +1,36 @@ +# Default values for the common options to all the programs in GNU +# Astronomy Utitlies. +# +# IMPORTANT NOTE FOR THE REPRODUCTION PIPELINE: The `lastconfig' +# option is very important here, because we don't want any of +# Gnuastro's programs to go into an un-controlled environment (user or +# system wide configuration files). Uncomment the `onlyversion' option +# when the paper/pipeline is ready to be published and set the value +# of X.X accordingly. +# +# The rest of this configuration file in this template reproduction +# pipeline is taken from the default Gnuastro configuration from its +# source (`bin/gnuastro.conf'). + +# Reproduction pipeline + lastconfig 1 +# onlyversion X.X + +# Input: + hdu 1 + ignorecase 1 + searchin name + +# Tessellation + tilesize 50,50 + numchannels 1,1 + remainderfrac 0.1 + workoverch 0 + interpnumngb 9 + interponlyblank 0 + +# Output: + tableformat fits-binary + +# Operating mode + minmapsize 1000000000 \ No newline at end of file diff --git a/reproduce/config/pipeline/DIRECTORIES.mk.in b/reproduce/config/pipeline/DIRECTORIES.mk.in new file mode 100644 index 0000000..9ebd67b --- /dev/null +++ b/reproduce/config/pipeline/DIRECTORIES.mk.in @@ -0,0 +1,50 @@ +# Top-level user specific directories. Note the points below: +# +# - The VALUES to these directories are initially JUST PLACE-HOLDERS! +# Please correct them based on your system. +# +# - The directories don't need to necessarily exist. If they do not exist, +# they will be created and the necessary data will be downloaded into +# them. Ofcourse provided that you have write permissions and an internet +# connection. +# +# - Do not use the tilde expansion `~' or variables for your home +# directory. Please use the full address, for example +# `/home/your-user-name'. +# +# - An ending forward-slash `/' is NOT necessary. In the pipeline, all +# these variables will be followed by a `/', so if you put a `/' at the +# end of the value here, you will see a `//' in the printed outputs +# during the processing. This has no technical problem, but can make +# reading the outputs harder and is thus not recommended. + + + + + +# Input data directories +# ---------------------- +# +# This is where the input data (with the same file-name standard as the +# online webpage) are stored. If this directory doesn't exist, or it +# doesn't contain the files (with the correct file-name formats), it will +# be created and the images will be downloaded. See +# `reproduce/config/pipeline/web.mk', for the URLs containing the expected +# inputs for each survey. +SURVEY = reproduce/SURVEY + + + + + +# Build directory +# --------------- +# +# This is where the intermediate outputs of each step are kept. +# +# Why a separate build directory? So the source and configuration files for +# this reproduction pipeline do not get crowded by all the +# intermediate/derivative files. Also to make synchronization and backups +# more easy: the contents of the build directory do not need to be backed +# up since they can be reproduced and they can be large. +BDIR = reproduce/BDIR diff --git a/reproduce/config/pipeline/filters.mk b/reproduce/config/pipeline/filters.mk new file mode 100644 index 0000000..6fa785d --- /dev/null +++ b/reproduce/config/pipeline/filters.mk @@ -0,0 +1,25 @@ +# `filters' are the possible different parts of the survey, for +# example filters in broad or narrow-band astronomical imaging +# datasets. Since a generic term for them (to apply other types of +# surveys/datasets) hasn't been considered yet, we'll stick with the +# `filters' name. But feel free to correct it (or propose a +# suggestion). +# +# If your dataset only has a single filter, or this concept is not +# defined for your type of input dataset, you can ignore this +# variable. +# +# The values can be any string to identify different parts of a survey +# separated by white space characters (for example `f125w f160w' or `J +# H' if you want to specify two filters). +# +# To be clean and also help in readability of the pipeline, it is good +# practice to define a separate `filter-XXXX' variable for each +# survey/dataset, even if they have overlapping filters. +# +# These `filters' are used in the initial downloading of the data and +# it is good practice (for avoiding bugs) to keep the same filter (and +# survey) names in the filenames of the intermediate/output files +# also. This will make sure that the raw input and intermediate/final +# output are exactly related. +filters-survey = a b c d e f g h i diff --git a/reproduce/config/pipeline/pdf.mk b/reproduce/config/pipeline/pdf.mk new file mode 100644 index 0000000..51ab933 --- /dev/null +++ b/reproduce/config/pipeline/pdf.mk @@ -0,0 +1,14 @@ +# Make the final PDF? +# ------------------- +# +# During the testing a pipeline, it is usually not necessary to build +# the PDF file (which makes a lot of output lines on the command-line +# and can make it hard to find the commands and possible errors (and +# their outputs). Also, in some cases, only the produced results may +# be of interest and not the final PDF, so LaTeX (and its necessary +# packages) may not be installed. +# +# If this variable is given any string, a PDF will be made with +# LaTeX. Otherwise, a notice will just printed that for now, no PDF +# will be created. +pdf-compile = yes diff --git a/reproduce/config/pipeline/web.mk b/reproduce/config/pipeline/web.mk new file mode 100644 index 0000000..f80b886 --- /dev/null +++ b/reproduce/config/pipeline/web.mk @@ -0,0 +1,6 @@ +# Web server(s) hosting the input data for this pipeline. +# +# This is the web page containing the files that must be located in the +# `SURVEY' directory of `reproduce/config/pipeline/DIRECTORIES.mk' on the +# local system. +web-survey = https://some.webpage.com/example/server diff --git a/reproduce/src/make/download.mk b/reproduce/src/make/download.mk new file mode 100644 index 0000000..244bd04 --- /dev/null +++ b/reproduce/src/make/download.mk @@ -0,0 +1,68 @@ +# Download all the necessary inputs if they are not already present. +# +# Since most systems only have one input/connection into the network, +# downloading is essentially a serial (not parallel) operation. so the +# recipes in this Makefile all use a single file lock to have one download +# script running at every instant. +# +# Original author: +# Your name +# Contributing author(s): +# Copyright (C) YYYY, Your Name. +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. See . + + + + + +# Identify the downloader tool +# ---------------------------- +# +# If cURL is already present, that will be used, otherwise, we'll use +# Wget. Since the options specifying the output filename are different +# between the two, we'll also specify the output option within the +# `downloader' variable. So it is important to first give the output +# filename after calling `downloader', then the web address. +downloader := $(shell if type curl > /dev/null; then downloader="curl -o"; \ + else downloader="wget -O"; \ + fi; echo "$$downloader"; ) + + + + + +# Download SURVEY data +# -------------------- +# +# Data from a survey (for example an imaging survey) usually have a special +# file-name format which should be set here in the `foreach' loop. Note +# that the `foreach' function needs the backslash (`\') at the end of the +# line when it is broken into multiple lines. +all-survey = $(foreach f, $(filters-survey), \ + $(SURVEY)/a-special-format-$(f).fits \ + $(SURVEY)/a-possibly-additional-$(f)-format.fits ) +$(SURVEY):; mkdir $@ +$(all-survey): $(SURVEY)/%: | $(SURVEY) $(lockdir) + flock $(lockdir)/download -c "$(downloader) $@ $(web-survey)/$*" + + + + + + +# Final TeX macro +# --------------- +# +# It is very important to mention the address where the data were +# downloaded in the final report. +$(mtexdir)/download.tex: $(pconfdir)/web.mk | $(mtexdir) + @echo "\\newcommand{\\websurvey}{$(web-survey)}" > $@ diff --git a/reproduce/src/make/initialize.mk b/reproduce/src/make/initialize.mk new file mode 100644 index 0000000..2da5e79 --- /dev/null +++ b/reproduce/src/make/initialize.mk @@ -0,0 +1,114 @@ +# Initialize the reproduction pipeline. +# +# Original author: +# Your name +# Contributing author(s): +# Copyright (C) YYYY, Your Name. +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# A copy of the GNU General Public License is available at +# . + + + + + +# High-level directory definitions +# -------------------------------- +# +# Basic directories that are used throughout the whole pipeline. +# +# Locks are used to make sure that an operation is done in series not in +# parallel (even if Make is run in parallel with the `-j' option). The most +# common case is downloads which are better done in series and not in +# parallel. Also, some programs may not be thread-safe, therefore it will +# be necessary to put a lock on them. This pipeline uses the `flock' +# program to achieve this. +texdir = $(BDIR)/tex +lockdir = $(BDIR)/locks +bdirsym = reproduce/build +mtexdir = $(texdir)/macros +pconfdir = reproduce/config/pipeline + + + + + +# Make the high-level level directories +# ------------------------------ +# +# These are just the top-level directories for all the separate steps. The +# directories (or possible sub-directories) for individual steps will be +# defined and added within their own Makefiles. +$(BDIR):; mkdir $@; +$(mtexdir): | $(texdir); mkdir $@ +$(texdir) $(lockdir): | $(BDIR); mkdir $@ + + + + + +# High-level Makefile management +# ------------------------------ +# +# About `.PHONY': these are targets that must be built even if a file with +# their name exists. Most don't correspond to a file, but those that do are +# included here ensure that the file is always built in every run: for +# example the pipeline versions may change within two separate runs, so we +# want it to be rebuilt every time. +.PHONY: all clean clean-mmap $(texdir)/versions.tex +clean-mmap:; rm -f reproduce/config/gnuastro/mmap* +clean: + rm -rf $(BDIR) $(bdirsym) *.pdf *.log *.out *.aux *.auxlock \ + reproduce/config/gnuastro/mmap* + + + + + +# Pipeline version +# ---------------- +# +# The pipeline's version is necessary for the analysis and must be +# calculated everytime the pipeline is run, so even though this file +# actually exists, it is also aded as a `.PHONY' target above. +$(mtexdir)/initialize.tex: | $(mtexdir) + + @v=$$(git describe --dirty --always); \ + echo "\newcommand{\pipelineversion}{$$v}" > $@ + + @v=$$(astnoisechisel --version | awk 'NR==1{print $$NF}'); \ + echo "\newcommand{\gnuastroversion}{$$v}" >> $@ + + echo "\newcommand{\bdir}{$(BDIR)}" >> $@ + + + + + +# Symbolic link to build directory +# -------------------------------- +# +# Besides $(BDIR), we are also making a symbolic link to it if $(bdirsym) +# is not empty. In case this symbolic link is not needed, simply remove its +# value from the definitions above. In that case, it will be read as a +# blank (non-existant). +# +# Note that $(BDIR) might not be an absolute path and this will complicate +# the symbolic link creation. To be generic, we'll first call `readlink' to +# make sure we have an absolute address, then we'll make a symbolic link to +# that. +ifneq ($(bdirsym),) +$(bdirsym): | $(BDIR) + absbdir=$$(readlink -f $(BDIR)); \ + ln -s $$absbdir $(bdirsym) +endif diff --git a/reproduce/src/make/paper.mk b/reproduce/src/make/paper.mk new file mode 100644 index 0000000..0725ec8 --- /dev/null +++ b/reproduce/src/make/paper.mk @@ -0,0 +1,34 @@ +# Build the final PDF paper/report. +# +# Original author: +# Your name +# Contributing author(s): +# Copyright (C) YYYY, Your Name. +# +# This script is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This script is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# A copy of the GNU General Public License is available at +# . + + + + + +# The final paper +# --------------- +# +# The commands to build the final report. We want the pipeline version to +# be checked everytime the final PDF is to be built. +paper.pdf: tex/pipeline.tex paper.tex + + # Make the report. + @pdflatex -shell-escape -halt-on-error paper.tex + @rm -f *.auxlock *.aux *.out *.log -- cgit v1.2.1