aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis/make/initialize.mk
diff options
context:
space:
mode:
Diffstat (limited to 'reproduce/analysis/make/initialize.mk')
-rw-r--r--reproduce/analysis/make/initialize.mk102
1 files changed, 102 insertions, 0 deletions
diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk
index 6893962..753c70a 100644
--- a/reproduce/analysis/make/initialize.mk
+++ b/reproduce/analysis/make/initialize.mk
@@ -434,6 +434,100 @@ dist-software:
+# Download input data
+# --------------------
+#
+# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset
+# properties. In most cases, you will not need to edit this rule. Simply
+# follow the instructions of 'INPUTS.conf' and set the variables names
+# according to the described standards and everything should be fine.
+#
+# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are
+# using several (relatively complex!) features particular to Make: In GNU
+# Make, '.VARIABLES' "... expands to a list of the names of all global
+# variables defined so far" (from the "Other Special Variables" section of
+# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only
+# used for input files, we find all the variables that contain the input
+# file name (the '%' is the filename). Finally, using the
+# pattern-substitution function ('patsubst'), we remove the fixed string at
+# the start and end of the variable name.
+#
+# Download lock file: Most systems have a single connection to the
+# internet, therefore downloading is inherently done in series. As a
+# result, when more than one dataset is necessary for download, if they are
+# done in parallel, the speed will be slower than downloading them in
+# series. We thus use the 'flock' program to tie/lock the downloading
+# process with a file and make sure that only one downloading event is in
+# progress at every moment.
+$(indir):; mkdir $@
+downloadwrapper = $(bashdir)/download-multi-try
+inputdatasets = $(foreach i, \
+ $(patsubst INPUT-%-sha256,%, \
+ $(filter INPUT-%-sha256,$(.VARIABLES))), \
+ $(indir)/$(i))
+$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
+
+# Set the necessary parameters for this input file as shell variables
+# (to help in readability).
+ url=$(INPUT-$*-url)
+ sha=$(INPUT-$*-sha256)
+
+# Download (or make the link to) the input dataset. If the file
+# exists in 'INDIR', it may be a symbolic link to some other place in
+# the filesystem. To avoid too many links when using these files
+# during processing, we'll use 'readlink -f' so the link we make here
+# points to the final file directly (note that 'readlink' is part of
+# GNU Coreutils). If its not a link, the 'readlink' part has no
+# effect.
+ unchecked=$@.unchecked
+ if [ -f $(INDIR)/$* ]; then
+ ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked
+ else
+ touch $(lockdir)/download
+ $(downloadwrapper) "wget --no-use-server-timestamps -O" \
+ $(lockdir)/download $$url $$unchecked
+ fi
+
+# Check the checksum to see if this is the proper dataset.
+ sum=$$(sha256sum $$unchecked | awk '{print $$1}')
+ if [ $$sum = $$sha ]; then
+ mv $$unchecked $@
+ echo "Integrity confirmed, using $@ in this project."
+
+# Checksums didn't match.
+ else
+
+# The user has asked to update the checksum in 'INPUTS.conf'.
+ if [ $$sha = "--auto-replace--" ]; then
+
+# Put the updated 'INPUTS.conf' in a temporary file.
+ inputstmp=$@.inputs
+ awk '{if($$1 == "INPUT-$*-sha256") \
+ $$3="'$$sum'"; print}' \
+ $(pconfdir)/INPUTS.conf > $$inputstmp
+
+# Update the INPUTS.conf, but not in parallel (using the
+# file-lock feature of 'flock').
+ touch $(lockdir)/inputs-update
+ flock $(lockdir)/inputs-update \
+ sh -c "mv $$inputstmp $(pconfdir)/INPUTS.conf"
+ mv $$unchecked $@
+
+# Error on non-matching checksums.
+ else
+ echo; echo;
+ echo "Wrong SHA256 checksum for input file '$*':"
+ echo " File location: $$unchecked"; \
+ echo " Expected SHA256 checksum: $$sha"; \
+ echo " Calculated SHA256 checksum: $$sum"; \
+ echo; exit 1
+ fi
+ fi
+
+
+
+
+
# Directory containing to-be-published datasets
# ---------------------------------------------
#
@@ -551,3 +645,11 @@ $(mtexdir)/initialize.tex: | $(mtexdir)
fi
echo "\newcommand{\maneagedate}{$$d}" >> $@
echo "\newcommand{\maneageversion}{$$v}" >> $@
+
+# ----------------- delete the lines below this -------------------
+# These lines are only intended for the default template's output, to
+# demonstrate that is it important to put links in the PDF (for
+# showing where your input data came from). Remove these lines as
+# part of the initial customization of Maneage for your project.
+ echo "\\newcommand{\\wfpctwourl}{$(INPUT-wfpc2.fits-url)}" >> $@
+# ----------------- delete the lines above this -------------------