diff options
Diffstat (limited to 'reproduce/analysis/make/download.mk')
-rw-r--r-- | reproduce/analysis/make/download.mk | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk new file mode 100644 index 0000000..a721863 --- /dev/null +++ b/reproduce/analysis/make/download.mk @@ -0,0 +1,91 @@ +# Download all the necessary inputs if they are not already present. +# +# Since most systems only have one input/connection into the network, +# downloading is essentially a serial (not parallel) operation. so the +# recipes in this Makefile all use a single file lock to have one download +# script running at every instant. +# +# Copyright (C) 2018-2019 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. See <http://www.gnu.org/licenses/>. + + + + + +# Download input data +# -------------------- +# +# The input dataset properties are defined in `$(pconfdir)/INPUTS.mk'. For +# this template we only have one dataset to enable easy processing, so all +# the extra checks in this rule may seem redundant. +# +# In a real project, you will need more than one dataset. In that case, +# just add them to the target list and add an `elif' statement to define it +# in the recipe. +# +# Files in a server usually have very long names, which are mainly designed +# for helping in data-base management and being generic. Since Make uses +# file names to identify which rule to execute, and the scope of this +# research project is much less than the generic survey/dataset, it is +# easier to have a simple/short name for the input dataset and work with +# that. In the first condition of the recipe below, we connect the short +# name with the raw database name of the dataset. +# +# Download lock file: Most systems have a single connection to the +# internet, therefore downloading is inherently done in series. As a +# result, when more than one dataset is necessary for download, if they are +# done in parallel, the speed will be slower than downloading them in +# series. We thus use the `flock' program to tie/lock the downloading +# process with a file and make sure that only one downloading event is in +# progress at every moment. +$(indir):; mkdir $@ +downloadwrapper = $(bashdir)/download-multi-try +inputdatasets = $(foreach i, wfpc2, $(indir)/$(i).fits) +$(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) + + # Set the necessary parameters for this input file. + if [ $* = wfpc2 ]; then + origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); + else + echo; echo; echo "Not recognized input dataset: '$*.fits'." + echo; echo; exit 1 + fi + + # Download (or make the link to) the input dataset. + if [ -f $(INDIR)/$$origname ]; then + ln -s $(INDIR)/$$origname $@ + else + touch $(lockdir)/download + $(downloadwrapper) "wget --no-use-server-timestamps -O" \ + $(lockdir)/download $$url/$$origname $@ + fi + + # Check the md5 sum to see if this is the proper dataset. + sum=$$(md5sum $@ | awk '{print $$1}') + if [ $$sum != $$mdf ]; then + wrongname=$(dir $@)/wrong-$(notdir $@) + mv $@ $$wrongname + echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname" + echo; echo; exit 1 + fi + + + + + +# Final TeX macro +# --------------- +# +# It is very important to mention the address where the data were +# downloaded in the final report. +$(mtexdir)/download.tex: $(pconfdir)/INPUTS.mk | $(mtexdir) + echo "\\newcommand{\\wfpctwourl}{$(WFPC2URL)}" > $@ |