1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
# Download all the necessary inputs if they are not already present.
#
# Since most systems only have one input/connection into the network,
# downloading is essentially a serial (not parallel) operation. so the
# recipes in this Makefile all use a single file lock to have one download
# script running at every instant.
#
# Copyright (C) 2018-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org>
#
# This Makefile is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This Makefile is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this Makefile. If not, see <http://www.gnu.org/licenses/>.
# Download input data
# --------------------
#
# The input dataset properties are defined in
# `$(pconfdir)/INPUTS.conf'. For this template we only have one dataset to
# enable easy processing, so all the extra checks in this rule may seem
# redundant.
#
# In a real project, you will need more than one dataset. In that case,
# just add them to the target list and add an `elif' statement to define it
# in the recipe.
#
# Files in a server usually have very long names, which are mainly designed
# for helping in data-base management and being generic. Since Make uses
# file names to identify which rule to execute, and the scope of this
# research project is much less than the generic survey/dataset, it is
# easier to have a simple/short name for the input dataset and work with
# that. In the first condition of the recipe below, we connect the short
# name with the raw database name of the dataset.
#
# Download lock file: Most systems have a single connection to the
# internet, therefore downloading is inherently done in series. As a
# result, when more than one dataset is necessary for download, if they are
# done in parallel, the speed will be slower than downloading them in
# series. We thus use the `flock' program to tie/lock the downloading
# process with a file and make sure that only one downloading event is in
# progress at every moment.
$(indir):; mkdir $@
downloadwrapper = $(bashdir)/download-multi-try
inputdatasets = $(indir)/menke20.xlsx
$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
# Set the necessary parameters for this input file.
if [ $* = menke20.xlsx ]; then
localname=$(MK20DATA); url=$(MK20URL); mdf=$(MK20MD5);
else
echo; echo; echo "Not recognized input dataset: '$*'."
echo; echo; exit 1
fi
# Download (or make the link to) the input dataset. If the file
# exists in `INDIR', it may be a symbolic link to some other place
# in the filesystem. To avoid too many links when using these files
# during processing, we'll use `readlink -f' so the link we make
# here points to the final file directly (note that `readlink' is
# part of GNU Coreutils). If its not a link, the `readlink' part
# has no effect.
unchecked=$@.unchecked
if [ -f $(INDIR)/$$localname ]; then
ln -fs $$(readlink -f $(INDIR)/$$localname) $$unchecked
else
touch $(lockdir)/download
$(downloadwrapper) "wget --no-use-server-timestamps -O" \
$(lockdir)/download $$url $$unchecked
fi
# Check the md5 sum to see if this is the proper dataset.
sum=$$(md5sum $$unchecked | awk '{print $$1}')
if [ $$sum = $$mdf ]; then
mv $$unchecked $@
echo "Integrity confirmed, using $@ in this project."
else
echo; echo;
echo "Wrong MD5 checksum for input file '$$localname':"
echo " File location: $$unchecked"; \
echo " Expected MD5 checksum: $$mdf"; \
echo " Calculated MD5 checksum: $$sum"; \
echo; exit 1
fi
# Final TeX macro
# ---------------
#
# It is very important to mention the address where the data were
# downloaded in the final report.
$(mtexdir)/download.tex: $(indir)/menke20.xlsx | $(mtexdir)
echo "\newcommand{\menketwentyxlsxname}{$(MK20DATA)}" > $@
echo "\newcommand{\menketwentychecksum}{$(MK20MD5)}" >> $@
echo "\newcommand{\menketwentybytesize}{$(MK20SIZE)}" >> $@
echo "\newcommand{\menketwentyurl}{$(MK20URL)}" >> $@
|