aboutsummaryrefslogtreecommitdiff
path: root/reproduce
diff options
context:
space:
mode:
Diffstat (limited to 'reproduce')
-rw-r--r--reproduce/analysis/config/INPUTS.conf34
-rw-r--r--reproduce/analysis/make/download.mk26
2 files changed, 47 insertions, 13 deletions
diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf
index b1cf546..ee52abb 100644
--- a/reproduce/analysis/config/INPUTS.conf
+++ b/reproduce/analysis/config/INPUTS.conf
@@ -1,6 +1,36 @@
-# Input files necessary for this project.
+# Input files necessary for this project, the variables defined in this
+# file are primarily used in 'reproduce/analysis/make/download.mk'. See
+# there for precise usage of the variables. But comments are also provided
+# here.
#
-# This file is read by the configure script and running Makefiles.
+# Necessary variables for each input dataset are listed below. Its good
+# that all the variables of each file have the same base-name (in the
+# example below 'WFPC2') with descriptive suffixes, also put a short
+# comment above each group of variables for each dataset, shortly
+# explaining what it is.
+#
+# 1) Local file name ('WFPC2IMAGE' below): this is the name of the dataset
+# on the local system (in 'INDIR', given at configuration time). It is
+# recommended that it be the same name as the online version of the
+# file like the case here (note how this variable is used in 'WFPC2URL'
+# for the dataset's full URL). However, this is not always possible, so
+# the local and server filenames may be different. Ultimately, the file
+# name is irrelevant, we check the integrity with the checksum.
+#
+# 2) The MD5 checksum of the file ('WFPC2MD5' below): this is very
+# important for an automatic verification of the file. You can
+# calculate it by running 'md5sum' on your desired file.
+#
+# 3) The human-readable size of the file ('WFPC2SIZE' below): this is an
+# optional feature which you can use for in the script that is loaded
+# at configure time ('reproduce/software/shell/configure.sh'). When
+# asking for the input-data directory, you can print some basic
+# information of the files for users to get a better feeling of the
+# volume. See that script for an example using this demo dataset.
+#
+# 4) The full dataset URL ('WFPC2URL' below): this is the full URL
+# (including the file-name) that can be used to download the dataset
+# when necessary. Also, see the description above on local filename.
#
# Copyright (C) 2018-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org>
#
diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk
index 8d9c164..d5f8cef 100644
--- a/reproduce/analysis/make/download.mk
+++ b/reproduce/analysis/make/download.mk
@@ -58,9 +58,9 @@ $(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
# Set the necessary parameters for this input file.
if [ $* = menke20.xlsx ]; then
- origname=$(MK20DATA); fullurl=$(MK20URL); mdf=$(MK20MD5);
+ localname=$(MK20DATA); url=$(MK20URL); mdf=$(MK20MD5);
else
- echo; echo; echo "Not recognized input dataset: '$*.fits'."
+ echo; echo; echo "Not recognized input dataset: '$*'."
echo; echo; exit 1
fi
@@ -71,21 +71,25 @@ $(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
# here points to the final file directly (note that `readlink' is
# part of GNU Coreutils). If its not a link, the `readlink' part
# has no effect.
- if [ -f $(INDIR)/$$origname ]; then
- ln -fs $$(readlink -f $(INDIR)/$$origname) $$out
+ unchecked=$@.unchecked
+ if [ -f $(INDIR)/$$localname ]; then
+ ln -fs $$(readlink -f $(INDIR)/$$localname) $$unchecked
else
touch $(lockdir)/download
$(downloadwrapper) "wget --no-use-server-timestamps -O" \
- $(lockdir)/download $$fullurl $@
+ $(lockdir)/download $$url $$unchecked
fi
# Check the md5 sum to see if this is the proper dataset.
- sum=$$(md5sum $@ | awk '{print $$1}')
- if [ $$sum != $$mdf ]; then
- wrongname=$(dir $@)/wrong-$(notdir $@)
- mv $@ $$wrongname
- echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname"
- echo; echo; exit 1
+ sum=$$(md5sum $$unchecked | awk '{print $$1}')
+ if [ $$sum = $$mdf ]; then
+ mv $$unchecked $@
+ else
+ echo; echo;
+ echo "Wrong MD5 checksum for input file '$$localname':"
+ echo " Expected MD5 checksum: $$mdf"; \
+ echo " Calculated MD5 checksum: $$sum"; \
+ echo; exit 1
fi