aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis/make/download.mk
blob: 6e6796276605fa5b0b77b35ec7d8c2ff972586bc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Download all the necessary inputs if they are not already present.
#
# Since most systems only have one input/connection into the network,
# downloading is essentially a serial (not parallel) operation. so the
# recipes in this Makefile all use a single file lock to have one download
# script running at every instant.
#
# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org>
#
# This Makefile is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This Makefile is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this Makefile.  If not, see <http://www.gnu.org/licenses/>.





# Download input data
# --------------------
#
# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset
# properties. In most cases, you will not need to edit this rule (or
# file!). Simply follow the instructions of 'INPUTS.conf' and set the
# variables names according to the described standards.
#
# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are
# using several (relatively complex!) features particular to Make: In GNU
# Make, '.VARIABLES' "... expands to a list of the names of all global
# variables defined so far" (from the "Other Special Variables" section of
# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only
# used for input files, we find all the variables that contain the input
# file name (the '%' is the filename). Finally, using the
# pattern-substitution function ('patsubst'), we remove the fixed string at
# the start and end of the variable name.
#
# Download lock file: Most systems have a single connection to the
# internet, therefore downloading is inherently done in series. As a
# result, when more than one dataset is necessary for download, if they are
# done in parallel, the speed will be slower than downloading them in
# series. We thus use the 'flock' program to tie/lock the downloading
# process with a file and make sure that only one downloading event is in
# progress at every moment.
$(indir):; mkdir $@
downloadwrapper = $(bashdir)/download-multi-try
inputdatasets = $(foreach i, \
                  $(patsubst INPUT-%-sha256,%, \
                    $(filter INPUT-%-sha256,$(.VARIABLES))), \
                  $(indir)/$(i))
$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)

#	Set the necessary parameters for this input file as shell variables
#	(to help in readability).
	url=$(INPUT-$*-url)
	sha=$(INPUT-$*-sha256)

#	Download (or make the link to) the input dataset. If the file
#	exists in 'INDIR', it may be a symbolic link to some other place in
#	the filesystem. To avoid too many links when using these files
#	during processing, we'll use 'readlink -f' so the link we make here
#	points to the final file directly (note that 'readlink' is part of
#	GNU Coreutils). If its not a link, the 'readlink' part has no
#	effect.
	unchecked=$@.unchecked
	if [ -f $(INDIR)/$* ]; then
	  ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked
	else
	  touch $(lockdir)/download
	  $(downloadwrapper) "wget --no-use-server-timestamps -O" \
	                     $(lockdir)/download $$url $$unchecked
	fi

#	Check the checksum to see if this is the proper dataset.
	sum=$$(sha256sum $$unchecked | awk '{print $$1}')
	if [ $$sum = $$sha ]; then
	  mv $$unchecked $@
	  echo "Integrity confirmed, using $@ in this project."
	else
	  echo; echo;
	  echo "Wrong SHA256 checksum for input file '$*':"
	  echo "  File location: $$unchecked"; \
	  echo "  Expected SHA256 checksum:   $$sha"; \
	  echo "  Calculated SHA256 checksum: $$sum"; \
	  echo; exit 1
	fi





# Final TeX macro
# ---------------
#
# It is very important to mention the address where the data were
# downloaded in the final report.
$(mtexdir)/download.tex: $(pconfdir)/INPUTS.conf | $(mtexdir)
	echo "\\newcommand{\\wfpctwourl}{$(INPUT-wfpc2.fits-url)}" > $@