aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis/make/download.mk
diff options
context:
space:
mode:
Diffstat (limited to 'reproduce/analysis/make/download.mk')
-rw-r--r--reproduce/analysis/make/download.mk86
1 files changed, 42 insertions, 44 deletions
diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk
index ea70fca..7110c8f 100644
--- a/reproduce/analysis/make/download.mk
+++ b/reproduce/analysis/make/download.mk
@@ -5,7 +5,7 @@
# recipes in this Makefile all use a single file lock to have one download
# script running at every instant.
#
-# Copyright (C) 2018-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org>
+# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org>
#
# This Makefile is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -27,70 +27,68 @@
# Download input data
# --------------------
#
-# The input dataset properties are defined in
-# `$(pconfdir)/INPUTS.conf'. For this template we only have one dataset to
-# enable easy processing, so all the extra checks in this rule may seem
-# redundant.
+# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset
+# properties. In most cases, you will not need to edit this rule (or
+# file!). Simply follow the instructions of 'INPUTS.conf' and set the
+# variables names according to the described standards.
#
-# In a real project, you will need more than one dataset. In that case,
-# just add them to the target list and add an `elif' statement to define it
-# in the recipe.
-#
-# Files in a server usually have very long names, which are mainly designed
-# for helping in data-base management and being generic. Since Make uses
-# file names to identify which rule to execute, and the scope of this
-# research project is much less than the generic survey/dataset, it is
-# easier to have a simple/short name for the input dataset and work with
-# that. In the first condition of the recipe below, we connect the short
-# name with the raw database name of the dataset.
+# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are
+# using several (relatively complex!) features particular to Make: In GNU
+# Make, '.VARIABLES' "... expands to a list of the names of all global
+# variables defined so far" (from the "Other Special Variables" section of
+# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only
+# used for input files, we find all the variables that contain the input
+# file name (the '%' is the filename). Finally, using the
+# pattern-substitution function ('patsubst'), we remove the fixed string at
+# the start and end of the variable name.
#
# Download lock file: Most systems have a single connection to the
# internet, therefore downloading is inherently done in series. As a
# result, when more than one dataset is necessary for download, if they are
# done in parallel, the speed will be slower than downloading them in
-# series. We thus use the `flock' program to tie/lock the downloading
+# series. We thus use the 'flock' program to tie/lock the downloading
# process with a file and make sure that only one downloading event is in
# progress at every moment.
$(indir):; mkdir $@
downloadwrapper = $(bashdir)/download-multi-try
-inputdatasets = $(indir)/menke20.xlsx
+inputdatasets = $(foreach i, \
+ $(patsubst INPUT-%-sha256,%, \
+ $(filter INPUT-%-sha256,$(.VARIABLES))), \
+ $(indir)/$(i))
$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
- # Set the necessary parameters for this input file.
- if [ $* = menke20.xlsx ]; then
- localname=$(MK20DATA); url=$(MK20URL); mdf=$(MK20MD5);
- else
- echo; echo; echo "Not recognized input dataset: '$*'."
- echo; echo; exit 1
- fi
+# Set the necessary parameters for this input file as shell variables
+# (to help in readability).
+ url=$(INPUT-$*-url)
+ sha=$(INPUT-$*-sha256)
- # Download (or make the link to) the input dataset. If the file
- # exists in `INDIR', it may be a symbolic link to some other place
- # in the filesystem. To avoid too many links when using these files
- # during processing, we'll use `readlink -f' so the link we make
- # here points to the final file directly (note that `readlink' is
- # part of GNU Coreutils). If its not a link, the `readlink' part
- # has no effect.
+# Download (or make the link to) the input dataset. If the file
+# exists in 'INDIR', it may be a symbolic link to some other place in
+# the filesystem. To avoid too many links when using these files
+# during processing, we'll use 'readlink -f' so the link we make here
+# points to the final file directly (note that 'readlink' is part of
+# GNU Coreutils). If its not a link, the 'readlink' part has no
+# effect.
unchecked=$@.unchecked
- if [ -f $(INDIR)/$$localname ]; then
- ln -fs $$(readlink -f $(INDIR)/$$localname) $$unchecked
+ if [ -f $(INDIR)/$* ]; then
+ ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked
else
touch $(lockdir)/download
$(downloadwrapper) "wget --no-use-server-timestamps -O" \
$(lockdir)/download $$url $$unchecked
fi
- # Check the md5 sum to see if this is the proper dataset.
- sum=$$(md5sum $$unchecked | awk '{print $$1}')
- if [ $$sum = $$mdf ]; then
+# Check the checksum to see if this is the proper dataset.
+ sum=$$(sha256sum $$unchecked | awk '{print $$1}')
+ if [ $$sum = $$sha ]; then
mv $$unchecked $@
echo "Integrity confirmed, using $@ in this project."
else
echo; echo;
- echo "Wrong MD5 checksum for input file '$$localname':"
+ echo "Wrong SHA256 checksum for input file '$*':"
echo " File location: $$unchecked"; \
- echo " Expected MD5 checksum: $$mdf"; \
- echo " Calculated MD5 checksum: $$sum"; \
+ echo " Expected SHA256 checksum: $$sha"; \
+ echo " Calculated SHA256 checksum: $$sum"; \
echo; exit 1
fi
@@ -104,7 +102,7 @@ $(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
# It is very important to mention the address where the data were
# downloaded in the final report.
$(mtexdir)/download.tex: $(indir)/menke20.xlsx | $(mtexdir)
- echo "\newcommand{\menketwentyxlsxname}{$(MK20DATA)}" > $@
- echo "\newcommand{\menketwentychecksum}{$(MK20MD5)}" >> $@
- echo "\newcommand{\menketwentybytesize}{$(MK20SIZE)}" >> $@
- echo "\newcommand{\menketwentyurl}{$(MK20URL)}" >> $@
+ echo "\newcommand{\menketwentyxlsxname}{menke20.xlsx}" > $@
+ echo "\newcommand{\menketwentychecksum}{$(INPUT-menke20.xlsx-sha256)}" >> $@
+ echo "\newcommand{\menketwentybytesize}{$(INPUT-menke20.xlsx-size)}" >> $@
+ echo "\newcommand{\menketwentyurl}{$(INPUT-menke20.xlsx-url)}" >> $@