diff options
-rw-r--r-- | reproduce/analysis/config/INPUTS.conf | 42 | ||||
-rw-r--r-- | reproduce/analysis/make/download.mk | 24 | ||||
-rw-r--r-- | reproduce/analysis/make/verify.mk | 2 |
3 files changed, 54 insertions, 14 deletions
diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf index 6ddaec7..5e6c425 100644 --- a/reproduce/analysis/config/INPUTS.conf +++ b/reproduce/analysis/config/INPUTS.conf @@ -1,6 +1,36 @@ -# Input files necessary for this project. +# Input files necessary for this project, the variables defined in this +# file are primarily used in 'reproduce/analysis/make/download.mk'. See +# there for precise usage of the variables. But comments are also provided +# here. # -# This file is read by the configure script and running Makefiles. +# Necessary variables for each input dataset are listed below. Its good +# that all the variables of each file have the same base-name (in the +# example below 'WFPC2') with descriptive suffixes, also put a short +# comment above each group of variables for each dataset, shortly +# explaining what it is. +# +# 1) Local file name ('WFPC2IMAGE' below): this is the name of the dataset +# on the local system (in 'INDIR', given at configuration time). It is +# recommended that it be the same name as the online version of the +# file like the case here (note how this variable is used in 'WFPC2URL' +# for the dataset's full URL). However, this is not always possible, so +# the local and server filenames may be different. Ultimately, the file +# name is irrelevant, we check the integrity with the checksum. +# +# 2) The MD5 checksum of the file ('WFPC2MD5' below): this is very +# important for an automatic verification of the file. You can +# calculate it by running 'md5sum' on your desired file. +# +# 3) The human-readable size of the file ('WFPC2SIZE' below): this is an +# optional feature which you can use for in the script that is loaded +# at configure time ('reproduce/software/shell/configure.sh'). When +# asking for the input-data directory, you can print some basic +# information of the files for users to get a better feeling of the +# volume. See that script for an example using this demo dataset. +# +# 4) The full dataset URL ('WFPC2URL' below): this is the full URL +# (including the file-name) that can be used to download the dataset +# when necessary. Also, see the description above on local filename. # # Copyright (C) 2018-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> # @@ -9,7 +39,13 @@ # this notice are preserved. This file is offered as-is, without any # warranty. + + + + +# Demonstration image used in the histogram plot (remove this when +# customizing). WFPC2IMAGE = WFPC2ASSNu5780205bx.fits WFPC2MD5 = a4791e42cd1045892f9c41f11b50bad8 WFPC2SIZE = 62kb -WFPC2URL = https://fits.gsfc.nasa.gov/samples +WFPC2URL = https://fits.gsfc.nasa.gov/samples/$(WFPC2IMAGE) diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk index 71ee7d3..bc8b8ce 100644 --- a/reproduce/analysis/make/download.mk +++ b/reproduce/analysis/make/download.mk @@ -58,7 +58,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # Set the necessary parameters for this input file. if [ $* = wfpc2 ]; then - origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); + localname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); else echo; echo; echo "Not recognized input dataset: '$*.fits'." echo; echo; exit 1 @@ -71,21 +71,25 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # here points to the final file directly (note that `readlink' is # part of GNU Coreutils). If its not a link, the `readlink' part # has no effect. - if [ -f $(INDIR)/$$origname ]; then - ln -fs $$(readlink -f $(INDIR)/$$origname) $$out + unchecked=$@.unchecked + if [ -f $(INDIR)/$$localname ]; then + ln -fs $$(readlink -f $(INDIR)/$$localname) $$unchecked else touch $(lockdir)/download $(downloadwrapper) "wget --no-use-server-timestamps -O" \ - $(lockdir)/download $$url/$$origname $@ + $(lockdir)/download $$url $$unchecked fi # Check the md5 sum to see if this is the proper dataset. - sum=$$(md5sum $@ | awk '{print $$1}') - if [ $$sum != $$mdf ]; then - wrongname=$(dir $@)/wrong-$(notdir $@) - mv $@ $$wrongname - echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname" - echo; echo; exit 1 + sum=$$(md5sum $$unchecked | awk '{print $$1}') + if [ $$sum = $$mdf ]; then + mv $$unchecked $@ + else + echo; echo; + echo "Wrong MD5 checksum for input file '$$localname':" + echo " Expected MD5 checksum: $$mdf"; \ + echo " Calculated MD5 checksum: $$sum"; \ + echo; exit 1 fi diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 67b3fea..69711d5 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -135,7 +135,7 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) # Verify TeX macros (the values that go into the PDF text). for m in $(verify-check); do file=$(mtexdir)/$$m.tex - if [ $$m == download ]; then s=6749e17ce606d57d30cebdbc1a5d23ad + if [ $$m == download ]; then s=49e4e9f049aa9da0453a67203d798587 elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705 else echo; echo "'$$m' not recognized."; exit 1 fi |