aboutsummaryrefslogtreecommitdiff
path: root/reproduce
diff options
context:
space:
mode:
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2022-06-11 00:43:31 +0200
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2022-06-11 01:06:37 +0200
commitc148beb5eb4553711f6c75e23b94d976c40212a7 (patch)
tree9b622dabcae69a819c0bd4823ef910e84adfcc3b /reproduce
parent502abf6870882f31b312c42773cd11779a8c72fc (diff)
IMPORTANT: download.mk removed, content moved to initialize.mk
SUMMARY: no special action should be necessary; but its an important update in low-level Maneage infra-structure (related with downloading and setting input checksums). Until now, we had a separate 'download.mk' as one of the default sub-Makefiles that should have been loaded in all the 'top-*.mk' files after 'initialize.mk'. This was due to historic reasons: until Commit 91799fe4b6d, we had to manually make some changes in 'download.mk' for every input file we defined in 'INPUTS.mk' (which was very inconvenient, and not easily possible for a large number of files!). But since Commit 91799fe4b6d, those manual changes are no longer necessary, and a normal user will hardly ever need to touch the contents of 'download.mk' (which also had one effective rule). Furthermore, based on shared projects with Zohre Ghaffari and Sepideh Eskandarlou (which involved a large number of large files), we recognized that it is very inconvenient to download a file once, update its checksum, and re-run Maneage (so the validation works). A robust solution was necesary to let project authors download the data and automatically update the checksum. With this commit, to help in high-level project management in Maneage, the single, and generic rule of 'download.mk' has been moved to 'initialize.mk', enabling us to fully remove this extra sub-Makefile from Maneage's source. Furthermore, with this commit, a usable solution to the automatic updating of the checksum has also been implemented (which has been described in the comments of 'INPUTS.conf'): the users can now set the checksum to '--auto-replace--'. In this case, the download rule (now in 'initialize.mk') will automatically update that line of 'INPUTS.conf' and add the checksum instead. After './project make' is complete, when the user runs 'git diff', they can see all the updated checksums in the source of their project and commit the updated 'INPUTS.conf' into the source so this will not be necessary later. Two other smaller issues have also been addressed in this commit: - There was an extra ',' in the call to 'filter-out' when we defined 'prepare-dep' in 'reproduce/analysis/make/prepare.mk'. This would cause a crash (with Make complaining that there is no rule for target 'initialize.mk,': notice the extra ','). With this commit, that extra ',' has been removed and the problem was solved. - The build recipe of Imfit (in 'reproduce/software/make/high-level.mk'), had two SPACE characters after '--no-openmp' which would make the reading hard. They have been updated to one SPACE.
Diffstat (limited to 'reproduce')
-rw-r--r--reproduce/analysis/config/INPUTS.conf12
-rw-r--r--reproduce/analysis/make/download.mk105
-rw-r--r--reproduce/analysis/make/initialize.mk102
-rw-r--r--reproduce/analysis/make/prepare.mk2
-rw-r--r--reproduce/analysis/make/top-make.mk1
-rw-r--r--reproduce/analysis/make/top-prepare.mk1
-rw-r--r--reproduce/analysis/make/verify.mk3
-rw-r--r--reproduce/software/make/high-level.mk6
8 files changed, 119 insertions, 113 deletions
diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf
index 3958153..75e24de 100644
--- a/reproduce/analysis/config/INPUTS.conf
+++ b/reproduce/analysis/config/INPUTS.conf
@@ -46,6 +46,18 @@
# the URL). There are more robust checksum algorithms
# like the 'SHA' standards.
#
+# AUTOMATIC CHEKSUM CALCULATION: In case you would like
+# Maneage to find the checksum upon downloading, put the
+# string '--auto-replace--' instead of a checksum. This
+# can be helpful for large datasets; where downloading
+# only for adding the checksum is not easy/possible and
+# can be buggy. In this scenario, upon downloading the
+# file its checksum will be calculated and will be
+# replaced with the '--auto-replace--' in this file. But
+# since this file is under version control, be sure to
+# commit all the updated checksums after your downloads
+# are finished!
+#
# INPUT-%-url: The URL to download the file if it is not available
# locally. It can happen that during the first phases of
# your project the data aren't yet public. In this case, you
diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk
deleted file mode 100644
index 6e67962..0000000
--- a/reproduce/analysis/make/download.mk
+++ /dev/null
@@ -1,105 +0,0 @@
-# Download all the necessary inputs if they are not already present.
-#
-# Since most systems only have one input/connection into the network,
-# downloading is essentially a serial (not parallel) operation. so the
-# recipes in this Makefile all use a single file lock to have one download
-# script running at every instant.
-#
-# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org>
-#
-# This Makefile is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This Makefile is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this Makefile. If not, see <http://www.gnu.org/licenses/>.
-
-
-
-
-
-# Download input data
-# --------------------
-#
-# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset
-# properties. In most cases, you will not need to edit this rule (or
-# file!). Simply follow the instructions of 'INPUTS.conf' and set the
-# variables names according to the described standards.
-#
-# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are
-# using several (relatively complex!) features particular to Make: In GNU
-# Make, '.VARIABLES' "... expands to a list of the names of all global
-# variables defined so far" (from the "Other Special Variables" section of
-# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only
-# used for input files, we find all the variables that contain the input
-# file name (the '%' is the filename). Finally, using the
-# pattern-substitution function ('patsubst'), we remove the fixed string at
-# the start and end of the variable name.
-#
-# Download lock file: Most systems have a single connection to the
-# internet, therefore downloading is inherently done in series. As a
-# result, when more than one dataset is necessary for download, if they are
-# done in parallel, the speed will be slower than downloading them in
-# series. We thus use the 'flock' program to tie/lock the downloading
-# process with a file and make sure that only one downloading event is in
-# progress at every moment.
-$(indir):; mkdir $@
-downloadwrapper = $(bashdir)/download-multi-try
-inputdatasets = $(foreach i, \
- $(patsubst INPUT-%-sha256,%, \
- $(filter INPUT-%-sha256,$(.VARIABLES))), \
- $(indir)/$(i))
-$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
-
-# Set the necessary parameters for this input file as shell variables
-# (to help in readability).
- url=$(INPUT-$*-url)
- sha=$(INPUT-$*-sha256)
-
-# Download (or make the link to) the input dataset. If the file
-# exists in 'INDIR', it may be a symbolic link to some other place in
-# the filesystem. To avoid too many links when using these files
-# during processing, we'll use 'readlink -f' so the link we make here
-# points to the final file directly (note that 'readlink' is part of
-# GNU Coreutils). If its not a link, the 'readlink' part has no
-# effect.
- unchecked=$@.unchecked
- if [ -f $(INDIR)/$* ]; then
- ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked
- else
- touch $(lockdir)/download
- $(downloadwrapper) "wget --no-use-server-timestamps -O" \
- $(lockdir)/download $$url $$unchecked
- fi
-
-# Check the checksum to see if this is the proper dataset.
- sum=$$(sha256sum $$unchecked | awk '{print $$1}')
- if [ $$sum = $$sha ]; then
- mv $$unchecked $@
- echo "Integrity confirmed, using $@ in this project."
- else
- echo; echo;
- echo "Wrong SHA256 checksum for input file '$*':"
- echo " File location: $$unchecked"; \
- echo " Expected SHA256 checksum: $$sha"; \
- echo " Calculated SHA256 checksum: $$sum"; \
- echo; exit 1
- fi
-
-
-
-
-
-# Final TeX macro
-# ---------------
-#
-# It is very important to mention the address where the data were
-# downloaded in the final report.
-$(mtexdir)/download.tex: $(pconfdir)/INPUTS.conf | $(mtexdir)
- echo "\\newcommand{\\wfpctwourl}{$(INPUT-wfpc2.fits-url)}" > $@
diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk
index 6893962..753c70a 100644
--- a/reproduce/analysis/make/initialize.mk
+++ b/reproduce/analysis/make/initialize.mk
@@ -434,6 +434,100 @@ dist-software:
+# Download input data
+# --------------------
+#
+# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset
+# properties. In most cases, you will not need to edit this rule. Simply
+# follow the instructions of 'INPUTS.conf' and set the variables names
+# according to the described standards and everything should be fine.
+#
+# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are
+# using several (relatively complex!) features particular to Make: In GNU
+# Make, '.VARIABLES' "... expands to a list of the names of all global
+# variables defined so far" (from the "Other Special Variables" section of
+# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only
+# used for input files, we find all the variables that contain the input
+# file name (the '%' is the filename). Finally, using the
+# pattern-substitution function ('patsubst'), we remove the fixed string at
+# the start and end of the variable name.
+#
+# Download lock file: Most systems have a single connection to the
+# internet, therefore downloading is inherently done in series. As a
+# result, when more than one dataset is necessary for download, if they are
+# done in parallel, the speed will be slower than downloading them in
+# series. We thus use the 'flock' program to tie/lock the downloading
+# process with a file and make sure that only one downloading event is in
+# progress at every moment.
+$(indir):; mkdir $@
+downloadwrapper = $(bashdir)/download-multi-try
+inputdatasets = $(foreach i, \
+ $(patsubst INPUT-%-sha256,%, \
+ $(filter INPUT-%-sha256,$(.VARIABLES))), \
+ $(indir)/$(i))
+$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
+
+# Set the necessary parameters for this input file as shell variables
+# (to help in readability).
+ url=$(INPUT-$*-url)
+ sha=$(INPUT-$*-sha256)
+
+# Download (or make the link to) the input dataset. If the file
+# exists in 'INDIR', it may be a symbolic link to some other place in
+# the filesystem. To avoid too many links when using these files
+# during processing, we'll use 'readlink -f' so the link we make here
+# points to the final file directly (note that 'readlink' is part of
+# GNU Coreutils). If its not a link, the 'readlink' part has no
+# effect.
+ unchecked=$@.unchecked
+ if [ -f $(INDIR)/$* ]; then
+ ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked
+ else
+ touch $(lockdir)/download
+ $(downloadwrapper) "wget --no-use-server-timestamps -O" \
+ $(lockdir)/download $$url $$unchecked
+ fi
+
+# Check the checksum to see if this is the proper dataset.
+ sum=$$(sha256sum $$unchecked | awk '{print $$1}')
+ if [ $$sum = $$sha ]; then
+ mv $$unchecked $@
+ echo "Integrity confirmed, using $@ in this project."
+
+# Checksums didn't match.
+ else
+
+# The user has asked to update the checksum in 'INPUTS.conf'.
+ if [ $$sha = "--auto-replace--" ]; then
+
+# Put the updated 'INPUTS.conf' in a temporary file.
+ inputstmp=$@.inputs
+ awk '{if($$1 == "INPUT-$*-sha256") \
+ $$3="'$$sum'"; print}' \
+ $(pconfdir)/INPUTS.conf > $$inputstmp
+
+# Update the INPUTS.conf, but not in parallel (using the
+# file-lock feature of 'flock').
+ touch $(lockdir)/inputs-update
+ flock $(lockdir)/inputs-update \
+ sh -c "mv $$inputstmp $(pconfdir)/INPUTS.conf"
+ mv $$unchecked $@
+
+# Error on non-matching checksums.
+ else
+ echo; echo;
+ echo "Wrong SHA256 checksum for input file '$*':"
+ echo " File location: $$unchecked"; \
+ echo " Expected SHA256 checksum: $$sha"; \
+ echo " Calculated SHA256 checksum: $$sum"; \
+ echo; exit 1
+ fi
+ fi
+
+
+
+
+
# Directory containing to-be-published datasets
# ---------------------------------------------
#
@@ -551,3 +645,11 @@ $(mtexdir)/initialize.tex: | $(mtexdir)
fi
echo "\newcommand{\maneagedate}{$$d}" >> $@
echo "\newcommand{\maneageversion}{$$v}" >> $@
+
+# ----------------- delete the lines below this -------------------
+# These lines are only intended for the default template's output, to
+# demonstrate that is it important to put links in the PDF (for
+# showing where your input data came from). Remove these lines as
+# part of the initial customization of Maneage for your project.
+ echo "\\newcommand{\\wfpctwourl}{$(INPUT-wfpc2.fits-url)}" >> $@
+# ----------------- delete the lines above this -------------------
diff --git a/reproduce/analysis/make/prepare.mk b/reproduce/analysis/make/prepare.mk
index aed2b5f..92b57b6 100644
--- a/reproduce/analysis/make/prepare.mk
+++ b/reproduce/analysis/make/prepare.mk
@@ -24,7 +24,7 @@
# Without this file, './project make' won't work.
#
# We need to remove the 'prepare' word from the list of 'makesrc'.
-prepare-dep = $(filter-out prepare, ,$(makesrc))
+prepare-dep = $(filter-out prepare, $(makesrc))
$(bsdir)/preparation-done.mk: \
$(foreach s, $(prepare-dep), $(mtexdir)/$(s).tex)
diff --git a/reproduce/analysis/make/top-make.mk b/reproduce/analysis/make/top-make.mk
index 4e95c54..d6e3822 100644
--- a/reproduce/analysis/make/top-make.mk
+++ b/reproduce/analysis/make/top-make.mk
@@ -111,7 +111,6 @@ endif
# the same order that they are defined here (we aren't just using a
# wild-card like the configuration Makefiles).
makesrc = initialize \
- download \
delete-me \
verify \
paper
diff --git a/reproduce/analysis/make/top-prepare.mk b/reproduce/analysis/make/top-prepare.mk
index 3950bf1..28dfc4a 100644
--- a/reproduce/analysis/make/top-prepare.mk
+++ b/reproduce/analysis/make/top-prepare.mk
@@ -64,7 +64,6 @@ endif
# './project prepare' and './project make' will first read 'initialize.mk'
# and 'downloads.mk'.
makesrc = initialize \
- download \
prepare
diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk
index a645713..25b3bec 100644
--- a/reproduce/analysis/make/verify.mk
+++ b/reproduce/analysis/make/verify.mk
@@ -139,8 +139,7 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
# Verify TeX macros (the values that go into the PDF text).
for m in $(verify-check); do
file=$(mtexdir)/$$m.tex
- if [ $$m == download ]; then s=49e4e9f049aa9da0453a67203d798587
- elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705
+ if [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705
else echo; echo "'$$m' not recognized."; exit 1
fi
$(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp)
diff --git a/reproduce/software/make/high-level.mk b/reproduce/software/make/high-level.mk
index 4ca6e89..9c5cd31 100644
--- a/reproduce/software/make/high-level.mk
+++ b/reproduce/software/make/high-level.mk
@@ -1204,17 +1204,17 @@ $(ibidir)/imfit-$(imfit-version): \
sed -i 's|/usr/local|$(idir)|g' SConstruct
sed -i 's|/usr/include|$(idir)/include|g' SConstruct
sed -i 's|.append(|.insert(0,|g' SConstruct
- scons --no-openmp --no-nlopt \
+ scons --no-openmp --no-nlopt \
--cc=$(ibdir)/gcc --cpp=$(ibdir)/g++ \
--header-path=$(idir)/include $$headerpath \
--lib-path=$(idir)/lib imfit
cp imfit $(ibdir)
- scons --no-openmp --no-nlopt \
+ scons --no-openmp --no-nlopt \
--cc=$(ibdir)/gcc --cpp=$(ibdir)/g++ \
--header-path=$(idir)/include $$headerpath \
--lib-path=$(idir)/lib imfit-mcmc
cp imfit-mcmc $(ibdir)
- scons --no-openmp --no-nlopt \
+ scons --no-openmp --no-nlopt \
--cc=$(ibdir)/gcc --cpp=$(ibdir)/g++ \
--header-path=$(idir)/include $$headerpath \
--lib-path=$(idir)/lib makeimage