aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis/make/initialize.mk
diff options
context:
space:
mode:
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2022-06-11 00:43:31 +0200
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2022-06-11 01:06:37 +0200
commitc148beb5eb4553711f6c75e23b94d976c40212a7 (patch)
tree9b622dabcae69a819c0bd4823ef910e84adfcc3b /reproduce/analysis/make/initialize.mk
parent502abf6870882f31b312c42773cd11779a8c72fc (diff)
IMPORTANT: download.mk removed, content moved to initialize.mk
SUMMARY: no special action should be necessary; but its an important update in low-level Maneage infra-structure (related with downloading and setting input checksums). Until now, we had a separate 'download.mk' as one of the default sub-Makefiles that should have been loaded in all the 'top-*.mk' files after 'initialize.mk'. This was due to historic reasons: until Commit 91799fe4b6d, we had to manually make some changes in 'download.mk' for every input file we defined in 'INPUTS.mk' (which was very inconvenient, and not easily possible for a large number of files!). But since Commit 91799fe4b6d, those manual changes are no longer necessary, and a normal user will hardly ever need to touch the contents of 'download.mk' (which also had one effective rule). Furthermore, based on shared projects with Zohre Ghaffari and Sepideh Eskandarlou (which involved a large number of large files), we recognized that it is very inconvenient to download a file once, update its checksum, and re-run Maneage (so the validation works). A robust solution was necesary to let project authors download the data and automatically update the checksum. With this commit, to help in high-level project management in Maneage, the single, and generic rule of 'download.mk' has been moved to 'initialize.mk', enabling us to fully remove this extra sub-Makefile from Maneage's source. Furthermore, with this commit, a usable solution to the automatic updating of the checksum has also been implemented (which has been described in the comments of 'INPUTS.conf'): the users can now set the checksum to '--auto-replace--'. In this case, the download rule (now in 'initialize.mk') will automatically update that line of 'INPUTS.conf' and add the checksum instead. After './project make' is complete, when the user runs 'git diff', they can see all the updated checksums in the source of their project and commit the updated 'INPUTS.conf' into the source so this will not be necessary later. Two other smaller issues have also been addressed in this commit: - There was an extra ',' in the call to 'filter-out' when we defined 'prepare-dep' in 'reproduce/analysis/make/prepare.mk'. This would cause a crash (with Make complaining that there is no rule for target 'initialize.mk,': notice the extra ','). With this commit, that extra ',' has been removed and the problem was solved. - The build recipe of Imfit (in 'reproduce/software/make/high-level.mk'), had two SPACE characters after '--no-openmp' which would make the reading hard. They have been updated to one SPACE.
Diffstat (limited to 'reproduce/analysis/make/initialize.mk')
-rw-r--r--reproduce/analysis/make/initialize.mk102
1 files changed, 102 insertions, 0 deletions
diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk
index 6893962..753c70a 100644
--- a/reproduce/analysis/make/initialize.mk
+++ b/reproduce/analysis/make/initialize.mk
@@ -434,6 +434,100 @@ dist-software:
+# Download input data
+# --------------------
+#
+# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset
+# properties. In most cases, you will not need to edit this rule. Simply
+# follow the instructions of 'INPUTS.conf' and set the variables names
+# according to the described standards and everything should be fine.
+#
+# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are
+# using several (relatively complex!) features particular to Make: In GNU
+# Make, '.VARIABLES' "... expands to a list of the names of all global
+# variables defined so far" (from the "Other Special Variables" section of
+# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only
+# used for input files, we find all the variables that contain the input
+# file name (the '%' is the filename). Finally, using the
+# pattern-substitution function ('patsubst'), we remove the fixed string at
+# the start and end of the variable name.
+#
+# Download lock file: Most systems have a single connection to the
+# internet, therefore downloading is inherently done in series. As a
+# result, when more than one dataset is necessary for download, if they are
+# done in parallel, the speed will be slower than downloading them in
+# series. We thus use the 'flock' program to tie/lock the downloading
+# process with a file and make sure that only one downloading event is in
+# progress at every moment.
+$(indir):; mkdir $@
+downloadwrapper = $(bashdir)/download-multi-try
+inputdatasets = $(foreach i, \
+ $(patsubst INPUT-%-sha256,%, \
+ $(filter INPUT-%-sha256,$(.VARIABLES))), \
+ $(indir)/$(i))
+$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
+
+# Set the necessary parameters for this input file as shell variables
+# (to help in readability).
+ url=$(INPUT-$*-url)
+ sha=$(INPUT-$*-sha256)
+
+# Download (or make the link to) the input dataset. If the file
+# exists in 'INDIR', it may be a symbolic link to some other place in
+# the filesystem. To avoid too many links when using these files
+# during processing, we'll use 'readlink -f' so the link we make here
+# points to the final file directly (note that 'readlink' is part of
+# GNU Coreutils). If its not a link, the 'readlink' part has no
+# effect.
+ unchecked=$@.unchecked
+ if [ -f $(INDIR)/$* ]; then
+ ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked
+ else
+ touch $(lockdir)/download
+ $(downloadwrapper) "wget --no-use-server-timestamps -O" \
+ $(lockdir)/download $$url $$unchecked
+ fi
+
+# Check the checksum to see if this is the proper dataset.
+ sum=$$(sha256sum $$unchecked | awk '{print $$1}')
+ if [ $$sum = $$sha ]; then
+ mv $$unchecked $@
+ echo "Integrity confirmed, using $@ in this project."
+
+# Checksums didn't match.
+ else
+
+# The user has asked to update the checksum in 'INPUTS.conf'.
+ if [ $$sha = "--auto-replace--" ]; then
+
+# Put the updated 'INPUTS.conf' in a temporary file.
+ inputstmp=$@.inputs
+ awk '{if($$1 == "INPUT-$*-sha256") \
+ $$3="'$$sum'"; print}' \
+ $(pconfdir)/INPUTS.conf > $$inputstmp
+
+# Update the INPUTS.conf, but not in parallel (using the
+# file-lock feature of 'flock').
+ touch $(lockdir)/inputs-update
+ flock $(lockdir)/inputs-update \
+ sh -c "mv $$inputstmp $(pconfdir)/INPUTS.conf"
+ mv $$unchecked $@
+
+# Error on non-matching checksums.
+ else
+ echo; echo;
+ echo "Wrong SHA256 checksum for input file '$*':"
+ echo " File location: $$unchecked"; \
+ echo " Expected SHA256 checksum: $$sha"; \
+ echo " Calculated SHA256 checksum: $$sum"; \
+ echo; exit 1
+ fi
+ fi
+
+
+
+
+
# Directory containing to-be-published datasets
# ---------------------------------------------
#
@@ -551,3 +645,11 @@ $(mtexdir)/initialize.tex: | $(mtexdir)
fi
echo "\newcommand{\maneagedate}{$$d}" >> $@
echo "\newcommand{\maneageversion}{$$v}" >> $@
+
+# ----------------- delete the lines below this -------------------
+# These lines are only intended for the default template's output, to
+# demonstrate that is it important to put links in the PDF (for
+# showing where your input data came from). Remove these lines as
+# part of the initial customization of Maneage for your project.
+ echo "\\newcommand{\\wfpctwourl}{$(INPUT-wfpc2.fits-url)}" >> $@
+# ----------------- delete the lines above this -------------------