aboutsummaryrefslogtreecommitdiff
path: root/reproduce/src/make/download.mk
diff options
context:
space:
mode:
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2018-11-25 15:22:48 +0000
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2018-11-25 15:41:00 +0000
commite623102768c426e86b0ed73904168006dfea2af9 (patch)
treeea5f0d95219398ff47fb0dc8ef92aa5e5173a956 /reproduce/src/make/download.mk
parent91eebe85edf38338bc4baed58d6a970c0f6b6b79 (diff)
Pipeline now downloads and uses an input dataset
In most analysis situations (except for simulations), an input dataset is necessary, but that part of the pipeline was just left out and a general `SURVEY' variable was set and never used. So with this commit, we actually use a sample FITS file from the FITS standard webpage, show it (as well as its histogram) and do some basic calculations on it. This preparation of the input datasets is done in a generic way to enable easy addition of more datasets if necessary.
Diffstat (limited to 'reproduce/src/make/download.mk')
-rw-r--r--reproduce/src/make/download.mk57
1 files changed, 44 insertions, 13 deletions
diff --git a/reproduce/src/make/download.mk b/reproduce/src/make/download.mk
index 9617a45..180d2cf 100644
--- a/reproduce/src/make/download.mk
+++ b/reproduce/src/make/download.mk
@@ -25,20 +25,51 @@
-# Download SURVEY data
+# Download input data
# --------------------
#
-# Data from a survey (for example an imaging survey) usually have a special
-# file-name format which should be set here in the `foreach' loop. Note
-# that the `foreach' function needs the backslash (`\') at the end of the
-# line when it is broken into multiple lines.
-all-survey = $(foreach f, $(filters-survey), \
- $(SURVEY)/a-special-format-$(f).fits \
- $(SURVEY)/a-possibly-additional-$(f)-format.fits )
-$(SURVEY):; mkdir $@
-$(all-survey): $(SURVEY)/%: | $(SURVEY) $(lockdir)
- flock $(lockdir)/download -c "$(DOWNLOADER) $@ $(web-survey)/$*"
+# The input dataset properties are defined in `$(pconfdir)/INPUTS.mk'. For
+# this template pipeline we only have one dataset to enable easy
+# processing, so all the extra checks in this rule may seem
+# redundant.
+#
+# However, in a real project, you will need more than one dataset. In that
+# case, just add them to the target list and add an `elif' statement to
+# define it in the recipe.
+#
+# Download lock file: Most systems have a single connection to the
+# internet, therefore downloading is inherently done in series. As a
+# result, when more than one dataset is necessary for download, if they are
+# done in parallel, the speed will be slower than downloading them in
+# series. We thus use the `flock' program to tie/lock the downloading
+# process with a file and make sure that only one downloading event is in
+# progress at every moment.
+$(indir):; mkdir $@
+inputdatasets = $(foreach i, $(WFPC2IMAGE), $(indir)/$(i))
+$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
+
+ # Set the necessary parameters for this input file.
+ if [ $* = $(WFPC2IMAGE) ]; then url=$(WFPC2URL); mdf=$(WFPC2MD5);
+ else
+ echo; echo; echo "Not recognized input dataset: '$*'."
+ echo; echo; exit 1
+ fi
+
+ # Download (or make the link to) the input dataset.
+ if [ -f $(INDIR)/$* ]; then
+ ln -s $(INDIR)/$* $@
+ else
+ flock $(lockdir)/download $(DOWNLOADER) $@ $$url/$*
+ fi
+ # Check the md5 sum to see if this is the proper dataset.
+ sum=$$(md5sum $@ | awk '{print $$1}')
+ if [ $$sum != $$mdf ]; then
+ wrongname=$(dir $@)/wrong-$(notdir $@)
+ mv $@ $$wrongname
+ echo; echo; echo "Wrong MD5 checksum for '$*' in $$wrongname"
+ echo; echo; exit 1
+ fi
@@ -49,5 +80,5 @@ $(all-survey): $(SURVEY)/%: | $(SURVEY) $(lockdir)
#
# It is very important to mention the address where the data were
# downloaded in the final report.
-$(mtexdir)/download.tex: $(pconfdir)/web.mk | $(mtexdir)
- @echo "\\newcommand{\\websurvey}{$(web-survey)}" > $@
+$(mtexdir)/download.tex: $(pconfdir)/INPUTS.mk | $(mtexdir)
+ echo "\\newcommand{\\wfpctwourl}{$(WFPC2URL)}" > $@