diff options
Diffstat (limited to 'reproduce')
-rw-r--r-- | reproduce/analysis/config/INPUTS.conf | 12 | ||||
-rw-r--r-- | reproduce/analysis/make/download.mk | 105 | ||||
-rw-r--r-- | reproduce/analysis/make/initialize.mk | 102 | ||||
-rw-r--r-- | reproduce/analysis/make/prepare.mk | 2 | ||||
-rw-r--r-- | reproduce/analysis/make/top-make.mk | 1 | ||||
-rw-r--r-- | reproduce/analysis/make/top-prepare.mk | 1 | ||||
-rw-r--r-- | reproduce/analysis/make/verify.mk | 3 | ||||
-rw-r--r-- | reproduce/software/make/high-level.mk | 6 |
8 files changed, 119 insertions, 113 deletions
diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf index 3958153..75e24de 100644 --- a/reproduce/analysis/config/INPUTS.conf +++ b/reproduce/analysis/config/INPUTS.conf @@ -46,6 +46,18 @@ # the URL). There are more robust checksum algorithms # like the 'SHA' standards. # +# AUTOMATIC CHEKSUM CALCULATION: In case you would like +# Maneage to find the checksum upon downloading, put the +# string '--auto-replace--' instead of a checksum. This +# can be helpful for large datasets; where downloading +# only for adding the checksum is not easy/possible and +# can be buggy. In this scenario, upon downloading the +# file its checksum will be calculated and will be +# replaced with the '--auto-replace--' in this file. But +# since this file is under version control, be sure to +# commit all the updated checksums after your downloads +# are finished! +# # INPUT-%-url: The URL to download the file if it is not available # locally. It can happen that during the first phases of # your project the data aren't yet public. In this case, you diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk deleted file mode 100644 index 6e67962..0000000 --- a/reproduce/analysis/make/download.mk +++ /dev/null @@ -1,105 +0,0 @@ -# Download all the necessary inputs if they are not already present. -# -# Since most systems only have one input/connection into the network, -# downloading is essentially a serial (not parallel) operation. so the -# recipes in this Makefile all use a single file lock to have one download -# script running at every instant. -# -# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> -# -# This Makefile is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This Makefile is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this Makefile. If not, see <http://www.gnu.org/licenses/>. - - - - - -# Download input data -# -------------------- -# -# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset -# properties. In most cases, you will not need to edit this rule (or -# file!). Simply follow the instructions of 'INPUTS.conf' and set the -# variables names according to the described standards. -# -# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are -# using several (relatively complex!) features particular to Make: In GNU -# Make, '.VARIABLES' "... expands to a list of the names of all global -# variables defined so far" (from the "Other Special Variables" section of -# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only -# used for input files, we find all the variables that contain the input -# file name (the '%' is the filename). Finally, using the -# pattern-substitution function ('patsubst'), we remove the fixed string at -# the start and end of the variable name. -# -# Download lock file: Most systems have a single connection to the -# internet, therefore downloading is inherently done in series. As a -# result, when more than one dataset is necessary for download, if they are -# done in parallel, the speed will be slower than downloading them in -# series. We thus use the 'flock' program to tie/lock the downloading -# process with a file and make sure that only one downloading event is in -# progress at every moment. -$(indir):; mkdir $@ -downloadwrapper = $(bashdir)/download-multi-try -inputdatasets = $(foreach i, \ - $(patsubst INPUT-%-sha256,%, \ - $(filter INPUT-%-sha256,$(.VARIABLES))), \ - $(indir)/$(i)) -$(inputdatasets): $(indir)/%: | $(indir) $(lockdir) - -# Set the necessary parameters for this input file as shell variables -# (to help in readability). - url=$(INPUT-$*-url) - sha=$(INPUT-$*-sha256) - -# Download (or make the link to) the input dataset. If the file -# exists in 'INDIR', it may be a symbolic link to some other place in -# the filesystem. To avoid too many links when using these files -# during processing, we'll use 'readlink -f' so the link we make here -# points to the final file directly (note that 'readlink' is part of -# GNU Coreutils). If its not a link, the 'readlink' part has no -# effect. - unchecked=$@.unchecked - if [ -f $(INDIR)/$* ]; then - ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked - else - touch $(lockdir)/download - $(downloadwrapper) "wget --no-use-server-timestamps -O" \ - $(lockdir)/download $$url $$unchecked - fi - -# Check the checksum to see if this is the proper dataset. - sum=$$(sha256sum $$unchecked | awk '{print $$1}') - if [ $$sum = $$sha ]; then - mv $$unchecked $@ - echo "Integrity confirmed, using $@ in this project." - else - echo; echo; - echo "Wrong SHA256 checksum for input file '$*':" - echo " File location: $$unchecked"; \ - echo " Expected SHA256 checksum: $$sha"; \ - echo " Calculated SHA256 checksum: $$sum"; \ - echo; exit 1 - fi - - - - - -# Final TeX macro -# --------------- -# -# It is very important to mention the address where the data were -# downloaded in the final report. -$(mtexdir)/download.tex: $(pconfdir)/INPUTS.conf | $(mtexdir) - echo "\\newcommand{\\wfpctwourl}{$(INPUT-wfpc2.fits-url)}" > $@ diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 6893962..753c70a 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -434,6 +434,100 @@ dist-software: +# Download input data +# -------------------- +# +# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset +# properties. In most cases, you will not need to edit this rule. Simply +# follow the instructions of 'INPUTS.conf' and set the variables names +# according to the described standards and everything should be fine. +# +# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are +# using several (relatively complex!) features particular to Make: In GNU +# Make, '.VARIABLES' "... expands to a list of the names of all global +# variables defined so far" (from the "Other Special Variables" section of +# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only +# used for input files, we find all the variables that contain the input +# file name (the '%' is the filename). Finally, using the +# pattern-substitution function ('patsubst'), we remove the fixed string at +# the start and end of the variable name. +# +# Download lock file: Most systems have a single connection to the +# internet, therefore downloading is inherently done in series. As a +# result, when more than one dataset is necessary for download, if they are +# done in parallel, the speed will be slower than downloading them in +# series. We thus use the 'flock' program to tie/lock the downloading +# process with a file and make sure that only one downloading event is in +# progress at every moment. +$(indir):; mkdir $@ +downloadwrapper = $(bashdir)/download-multi-try +inputdatasets = $(foreach i, \ + $(patsubst INPUT-%-sha256,%, \ + $(filter INPUT-%-sha256,$(.VARIABLES))), \ + $(indir)/$(i)) +$(inputdatasets): $(indir)/%: | $(indir) $(lockdir) + +# Set the necessary parameters for this input file as shell variables +# (to help in readability). + url=$(INPUT-$*-url) + sha=$(INPUT-$*-sha256) + +# Download (or make the link to) the input dataset. If the file +# exists in 'INDIR', it may be a symbolic link to some other place in +# the filesystem. To avoid too many links when using these files +# during processing, we'll use 'readlink -f' so the link we make here +# points to the final file directly (note that 'readlink' is part of +# GNU Coreutils). If its not a link, the 'readlink' part has no +# effect. + unchecked=$@.unchecked + if [ -f $(INDIR)/$* ]; then + ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked + else + touch $(lockdir)/download + $(downloadwrapper) "wget --no-use-server-timestamps -O" \ + $(lockdir)/download $$url $$unchecked + fi + +# Check the checksum to see if this is the proper dataset. + sum=$$(sha256sum $$unchecked | awk '{print $$1}') + if [ $$sum = $$sha ]; then + mv $$unchecked $@ + echo "Integrity confirmed, using $@ in this project." + +# Checksums didn't match. + else + +# The user has asked to update the checksum in 'INPUTS.conf'. + if [ $$sha = "--auto-replace--" ]; then + +# Put the updated 'INPUTS.conf' in a temporary file. + inputstmp=$@.inputs + awk '{if($$1 == "INPUT-$*-sha256") \ + $$3="'$$sum'"; print}' \ + $(pconfdir)/INPUTS.conf > $$inputstmp + +# Update the INPUTS.conf, but not in parallel (using the +# file-lock feature of 'flock'). + touch $(lockdir)/inputs-update + flock $(lockdir)/inputs-update \ + sh -c "mv $$inputstmp $(pconfdir)/INPUTS.conf" + mv $$unchecked $@ + +# Error on non-matching checksums. + else + echo; echo; + echo "Wrong SHA256 checksum for input file '$*':" + echo " File location: $$unchecked"; \ + echo " Expected SHA256 checksum: $$sha"; \ + echo " Calculated SHA256 checksum: $$sum"; \ + echo; exit 1 + fi + fi + + + + + # Directory containing to-be-published datasets # --------------------------------------------- # @@ -551,3 +645,11 @@ $(mtexdir)/initialize.tex: | $(mtexdir) fi echo "\newcommand{\maneagedate}{$$d}" >> $@ echo "\newcommand{\maneageversion}{$$v}" >> $@ + +# ----------------- delete the lines below this ------------------- +# These lines are only intended for the default template's output, to +# demonstrate that is it important to put links in the PDF (for +# showing where your input data came from). Remove these lines as +# part of the initial customization of Maneage for your project. + echo "\\newcommand{\\wfpctwourl}{$(INPUT-wfpc2.fits-url)}" >> $@ +# ----------------- delete the lines above this ------------------- diff --git a/reproduce/analysis/make/prepare.mk b/reproduce/analysis/make/prepare.mk index aed2b5f..92b57b6 100644 --- a/reproduce/analysis/make/prepare.mk +++ b/reproduce/analysis/make/prepare.mk @@ -24,7 +24,7 @@ # Without this file, './project make' won't work. # # We need to remove the 'prepare' word from the list of 'makesrc'. -prepare-dep = $(filter-out prepare, ,$(makesrc)) +prepare-dep = $(filter-out prepare, $(makesrc)) $(bsdir)/preparation-done.mk: \ $(foreach s, $(prepare-dep), $(mtexdir)/$(s).tex) diff --git a/reproduce/analysis/make/top-make.mk b/reproduce/analysis/make/top-make.mk index 4e95c54..d6e3822 100644 --- a/reproduce/analysis/make/top-make.mk +++ b/reproduce/analysis/make/top-make.mk @@ -111,7 +111,6 @@ endif # the same order that they are defined here (we aren't just using a # wild-card like the configuration Makefiles). makesrc = initialize \ - download \ delete-me \ verify \ paper diff --git a/reproduce/analysis/make/top-prepare.mk b/reproduce/analysis/make/top-prepare.mk index 3950bf1..28dfc4a 100644 --- a/reproduce/analysis/make/top-prepare.mk +++ b/reproduce/analysis/make/top-prepare.mk @@ -64,7 +64,6 @@ endif # './project prepare' and './project make' will first read 'initialize.mk' # and 'downloads.mk'. makesrc = initialize \ - download \ prepare diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index a645713..25b3bec 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -139,8 +139,7 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) # Verify TeX macros (the values that go into the PDF text). for m in $(verify-check); do file=$(mtexdir)/$$m.tex - if [ $$m == download ]; then s=49e4e9f049aa9da0453a67203d798587 - elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705 + if [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705 else echo; echo "'$$m' not recognized."; exit 1 fi $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp) diff --git a/reproduce/software/make/high-level.mk b/reproduce/software/make/high-level.mk index 4ca6e89..9c5cd31 100644 --- a/reproduce/software/make/high-level.mk +++ b/reproduce/software/make/high-level.mk @@ -1204,17 +1204,17 @@ $(ibidir)/imfit-$(imfit-version): \ sed -i 's|/usr/local|$(idir)|g' SConstruct sed -i 's|/usr/include|$(idir)/include|g' SConstruct sed -i 's|.append(|.insert(0,|g' SConstruct - scons --no-openmp --no-nlopt \ + scons --no-openmp --no-nlopt \ --cc=$(ibdir)/gcc --cpp=$(ibdir)/g++ \ --header-path=$(idir)/include $$headerpath \ --lib-path=$(idir)/lib imfit cp imfit $(ibdir) - scons --no-openmp --no-nlopt \ + scons --no-openmp --no-nlopt \ --cc=$(ibdir)/gcc --cpp=$(ibdir)/g++ \ --header-path=$(idir)/include $$headerpath \ --lib-path=$(idir)/lib imfit-mcmc cp imfit-mcmc $(ibdir) - scons --no-openmp --no-nlopt \ + scons --no-openmp --no-nlopt \ --cc=$(ibdir)/gcc --cpp=$(ibdir)/g++ \ --header-path=$(idir)/include $$headerpath \ --lib-path=$(idir)/lib makeimage |