aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis/make/download.mk
diff options
context:
space:
mode:
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2022-05-09 13:32:47 +0200
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2022-05-09 23:52:29 +0200
commit9fdeebaacd06d57c479cd69e9937c4bfe5d0a286 (patch)
tree012e6194ad6e25a81a9c99b4d0bd0852bc9a12af /reproduce/analysis/make/download.mk
parent480184b3da399fab11b50e67f01d2efa6bea0e3e (diff)
parentf51b5e2e500dd6450a5a3425e85df78245fc5c5c (diff)
Imported recent updates in Maneage, conflicts fixed
Until now, Maneage had undergone some updates. With this commit, those updates have been imported and the conflicts that resulted were fixed. They were all cosmetic and had no effect on the analysis. The most significant one was about the change in the format of 'INPUTS.conf'. In the process, I also noticed that the IEEEtran LaTeX package is now called 'ieeetran' (the 'tlmgr' of TeXLive 2022 was failing).
Diffstat (limited to 'reproduce/analysis/make/download.mk')
-rw-r--r--reproduce/analysis/make/download.mk86
1 files changed, 42 insertions, 44 deletions
diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk
index ea70fca..7110c8f 100644
--- a/reproduce/analysis/make/download.mk
+++ b/reproduce/analysis/make/download.mk
@@ -5,7 +5,7 @@
# recipes in this Makefile all use a single file lock to have one download
# script running at every instant.
#
-# Copyright (C) 2018-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org>
+# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org>
#
# This Makefile is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -27,70 +27,68 @@
# Download input data
# --------------------
#
-# The input dataset properties are defined in
-# `$(pconfdir)/INPUTS.conf'. For this template we only have one dataset to
-# enable easy processing, so all the extra checks in this rule may seem
-# redundant.
+# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset
+# properties. In most cases, you will not need to edit this rule (or
+# file!). Simply follow the instructions of 'INPUTS.conf' and set the
+# variables names according to the described standards.
#
-# In a real project, you will need more than one dataset. In that case,
-# just add them to the target list and add an `elif' statement to define it
-# in the recipe.
-#
-# Files in a server usually have very long names, which are mainly designed
-# for helping in data-base management and being generic. Since Make uses
-# file names to identify which rule to execute, and the scope of this
-# research project is much less than the generic survey/dataset, it is
-# easier to have a simple/short name for the input dataset and work with
-# that. In the first condition of the recipe below, we connect the short
-# name with the raw database name of the dataset.
+# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are
+# using several (relatively complex!) features particular to Make: In GNU
+# Make, '.VARIABLES' "... expands to a list of the names of all global
+# variables defined so far" (from the "Other Special Variables" section of
+# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only
+# used for input files, we find all the variables that contain the input
+# file name (the '%' is the filename). Finally, using the
+# pattern-substitution function ('patsubst'), we remove the fixed string at
+# the start and end of the variable name.
#
# Download lock file: Most systems have a single connection to the
# internet, therefore downloading is inherently done in series. As a
# result, when more than one dataset is necessary for download, if they are
# done in parallel, the speed will be slower than downloading them in
-# series. We thus use the `flock' program to tie/lock the downloading
+# series. We thus use the 'flock' program to tie/lock the downloading
# process with a file and make sure that only one downloading event is in
# progress at every moment.
$(indir):; mkdir $@
downloadwrapper = $(bashdir)/download-multi-try
-inputdatasets = $(indir)/menke20.xlsx
+inputdatasets = $(foreach i, \
+ $(patsubst INPUT-%-sha256,%, \
+ $(filter INPUT-%-sha256,$(.VARIABLES))), \
+ $(indir)/$(i))
$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
- # Set the necessary parameters for this input file.
- if [ $* = menke20.xlsx ]; then
- localname=$(MK20DATA); url=$(MK20URL); mdf=$(MK20MD5);
- else
- echo; echo; echo "Not recognized input dataset: '$*'."
- echo; echo; exit 1
- fi
+# Set the necessary parameters for this input file as shell variables
+# (to help in readability).
+ url=$(INPUT-$*-url)
+ sha=$(INPUT-$*-sha256)
- # Download (or make the link to) the input dataset. If the file
- # exists in `INDIR', it may be a symbolic link to some other place
- # in the filesystem. To avoid too many links when using these files
- # during processing, we'll use `readlink -f' so the link we make
- # here points to the final file directly (note that `readlink' is
- # part of GNU Coreutils). If its not a link, the `readlink' part
- # has no effect.
+# Download (or make the link to) the input dataset. If the file
+# exists in 'INDIR', it may be a symbolic link to some other place in
+# the filesystem. To avoid too many links when using these files
+# during processing, we'll use 'readlink -f' so the link we make here
+# points to the final file directly (note that 'readlink' is part of
+# GNU Coreutils). If its not a link, the 'readlink' part has no
+# effect.
unchecked=$@.unchecked
- if [ -f $(INDIR)/$$localname ]; then
- ln -fs $$(readlink -f $(INDIR)/$$localname) $$unchecked
+ if [ -f $(INDIR)/$* ]; then
+ ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked
else
touch $(lockdir)/download
$(downloadwrapper) "wget --no-use-server-timestamps -O" \
$(lockdir)/download $$url $$unchecked
fi
- # Check the md5 sum to see if this is the proper dataset.
- sum=$$(md5sum $$unchecked | awk '{print $$1}')
- if [ $$sum = $$mdf ]; then
+# Check the checksum to see if this is the proper dataset.
+ sum=$$(sha256sum $$unchecked | awk '{print $$1}')
+ if [ $$sum = $$sha ]; then
mv $$unchecked $@
echo "Integrity confirmed, using $@ in this project."
else
echo; echo;
- echo "Wrong MD5 checksum for input file '$$localname':"
+ echo "Wrong SHA256 checksum for input file '$*':"
echo " File location: $$unchecked"; \
- echo " Expected MD5 checksum: $$mdf"; \
- echo " Calculated MD5 checksum: $$sum"; \
+ echo " Expected SHA256 checksum: $$sha"; \
+ echo " Calculated SHA256 checksum: $$sum"; \
echo; exit 1
fi
@@ -104,7 +102,7 @@ $(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
# It is very important to mention the address where the data were
# downloaded in the final report.
$(mtexdir)/download.tex: $(indir)/menke20.xlsx | $(mtexdir)
- echo "\newcommand{\menketwentyxlsxname}{$(MK20DATA)}" > $@
- echo "\newcommand{\menketwentychecksum}{$(MK20MD5)}" >> $@
- echo "\newcommand{\menketwentybytesize}{$(MK20SIZE)}" >> $@
- echo "\newcommand{\menketwentyurl}{$(MK20URL)}" >> $@
+ echo "\newcommand{\menketwentyxlsxname}{menke20.xlsx}" > $@
+ echo "\newcommand{\menketwentychecksum}{$(INPUT-menke20.xlsx-sha256)}" >> $@
+ echo "\newcommand{\menketwentybytesize}{$(INPUT-menke20.xlsx-size)}" >> $@
+ echo "\newcommand{\menketwentyurl}{$(INPUT-menke20.xlsx-url)}" >> $@