From 2bd2e2f1833300d5102339e2aa417a3099c13960 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Tue, 9 Jun 2020 20:09:09 +0100 Subject: IMPORTANT: bug fix in default data download script of download.mk Summary of possible semantic conflicts 1. The recipe to download input datasets has been modified. You have to re-set the old 'origname' variable to 'localname' (to avoid confusion) and the default dataset URL should now be complete (including the actual filename). See the newly added descriptions in 'INPUTS.conf' for more on this. Until now, when the dataset was already present on the host system, a link couldn't be made to it, causing the project to crash in the checksum phase. This has been fixed with properly naming the main variable as 'localname' to avoid the confusion that caused it. Some other problems have been fixed in this recipe in the meantime: - When the checksum is different, the expected and calculated checksums are printed. - In the default paper, we now print the full URL of the dataset, not just the server, so the checksum of the 'download.tex' step has been updated. --- reproduce/analysis/config/INPUTS.conf | 42 ++++++++++++++++++++++++++++++++--- reproduce/analysis/make/download.mk | 24 +++++++++++--------- reproduce/analysis/make/verify.mk | 2 +- 3 files changed, 54 insertions(+), 14 deletions(-) (limited to 'reproduce') diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf index 6ddaec7..5e6c425 100644 --- a/reproduce/analysis/config/INPUTS.conf +++ b/reproduce/analysis/config/INPUTS.conf @@ -1,6 +1,36 @@ -# Input files necessary for this project. +# Input files necessary for this project, the variables defined in this +# file are primarily used in 'reproduce/analysis/make/download.mk'. See +# there for precise usage of the variables. But comments are also provided +# here. # -# This file is read by the configure script and running Makefiles. +# Necessary variables for each input dataset are listed below. Its good +# that all the variables of each file have the same base-name (in the +# example below 'WFPC2') with descriptive suffixes, also put a short +# comment above each group of variables for each dataset, shortly +# explaining what it is. +# +# 1) Local file name ('WFPC2IMAGE' below): this is the name of the dataset +# on the local system (in 'INDIR', given at configuration time). It is +# recommended that it be the same name as the online version of the +# file like the case here (note how this variable is used in 'WFPC2URL' +# for the dataset's full URL). However, this is not always possible, so +# the local and server filenames may be different. Ultimately, the file +# name is irrelevant, we check the integrity with the checksum. +# +# 2) The MD5 checksum of the file ('WFPC2MD5' below): this is very +# important for an automatic verification of the file. You can +# calculate it by running 'md5sum' on your desired file. +# +# 3) The human-readable size of the file ('WFPC2SIZE' below): this is an +# optional feature which you can use for in the script that is loaded +# at configure time ('reproduce/software/shell/configure.sh'). When +# asking for the input-data directory, you can print some basic +# information of the files for users to get a better feeling of the +# volume. See that script for an example using this demo dataset. +# +# 4) The full dataset URL ('WFPC2URL' below): this is the full URL +# (including the file-name) that can be used to download the dataset +# when necessary. Also, see the description above on local filename. # # Copyright (C) 2018-2020 Mohammad Akhlaghi # @@ -9,7 +39,13 @@ # this notice are preserved. This file is offered as-is, without any # warranty. + + + + +# Demonstration image used in the histogram plot (remove this when +# customizing). WFPC2IMAGE = WFPC2ASSNu5780205bx.fits WFPC2MD5 = a4791e42cd1045892f9c41f11b50bad8 WFPC2SIZE = 62kb -WFPC2URL = https://fits.gsfc.nasa.gov/samples +WFPC2URL = https://fits.gsfc.nasa.gov/samples/$(WFPC2IMAGE) diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk index 71ee7d3..bc8b8ce 100644 --- a/reproduce/analysis/make/download.mk +++ b/reproduce/analysis/make/download.mk @@ -58,7 +58,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # Set the necessary parameters for this input file. if [ $* = wfpc2 ]; then - origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); + localname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); else echo; echo; echo "Not recognized input dataset: '$*.fits'." echo; echo; exit 1 @@ -71,21 +71,25 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # here points to the final file directly (note that `readlink' is # part of GNU Coreutils). If its not a link, the `readlink' part # has no effect. - if [ -f $(INDIR)/$$origname ]; then - ln -fs $$(readlink -f $(INDIR)/$$origname) $$out + unchecked=$@.unchecked + if [ -f $(INDIR)/$$localname ]; then + ln -fs $$(readlink -f $(INDIR)/$$localname) $$unchecked else touch $(lockdir)/download $(downloadwrapper) "wget --no-use-server-timestamps -O" \ - $(lockdir)/download $$url/$$origname $@ + $(lockdir)/download $$url $$unchecked fi # Check the md5 sum to see if this is the proper dataset. - sum=$$(md5sum $@ | awk '{print $$1}') - if [ $$sum != $$mdf ]; then - wrongname=$(dir $@)/wrong-$(notdir $@) - mv $@ $$wrongname - echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname" - echo; echo; exit 1 + sum=$$(md5sum $$unchecked | awk '{print $$1}') + if [ $$sum = $$mdf ]; then + mv $$unchecked $@ + else + echo; echo; + echo "Wrong MD5 checksum for input file '$$localname':" + echo " Expected MD5 checksum: $$mdf"; \ + echo " Calculated MD5 checksum: $$sum"; \ + echo; exit 1 fi diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 67b3fea..69711d5 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -135,7 +135,7 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) # Verify TeX macros (the values that go into the PDF text). for m in $(verify-check); do file=$(mtexdir)/$$m.tex - if [ $$m == download ]; then s=6749e17ce606d57d30cebdbc1a5d23ad + if [ $$m == download ]; then s=49e4e9f049aa9da0453a67203d798587 elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705 else echo; echo "'$$m' not recognized."; exit 1 fi -- cgit v1.2.1