aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis/make/download.mk
diff options
context:
space:
mode:
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2019-04-15 01:47:58 +0100
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2019-04-15 02:24:09 +0100
commit313b936b502d22b6a2ff43f560dee0bb51fd01d0 (patch)
tree70f884b91b393be4d3c6b7cfaeaf3412900bd16f /reproduce/analysis/make/download.mk
parent4722ea598edd6b630227404c48c1c09ac527e9b8 (diff)
New architecture to separate software-building and analysis steps
Until now, the software building and analysis steps of the pipeline were intertwined. However, these steps (of how to build a software, and how to use it) are logically completely independent. Therefore with this commit, the pipeline now has a new architecture (particularly in the `reproduce' directory) to emphasize this distinction: The `reproduce' directory now has the two `software' and `analysis' subdirectories and the respective parts of the previous architecture have been broken up between these two based on their function. There is also no more `src' directory. The `config' directory for software and analysis is now mixed with the language-specific directories. Also, some of the software versions were also updated after some checks with their webpages. This new architecture will allow much more focused work on each part of the pipeline (to install the software and to run them for an analysis).
Diffstat (limited to 'reproduce/analysis/make/download.mk')
-rw-r--r--reproduce/analysis/make/download.mk91
1 files changed, 91 insertions, 0 deletions
diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk
new file mode 100644
index 0000000..a721863
--- /dev/null
+++ b/reproduce/analysis/make/download.mk
@@ -0,0 +1,91 @@
+# Download all the necessary inputs if they are not already present.
+#
+# Since most systems only have one input/connection into the network,
+# downloading is essentially a serial (not parallel) operation. so the
+# recipes in this Makefile all use a single file lock to have one download
+# script running at every instant.
+#
+# Copyright (C) 2018-2019 Mohammad Akhlaghi <mohammad@akhlaghi.org>
+#
+# This Makefile is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+#
+# This Makefile is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details. See <http://www.gnu.org/licenses/>.
+
+
+
+
+
+# Download input data
+# --------------------
+#
+# The input dataset properties are defined in `$(pconfdir)/INPUTS.mk'. For
+# this template we only have one dataset to enable easy processing, so all
+# the extra checks in this rule may seem redundant.
+#
+# In a real project, you will need more than one dataset. In that case,
+# just add them to the target list and add an `elif' statement to define it
+# in the recipe.
+#
+# Files in a server usually have very long names, which are mainly designed
+# for helping in data-base management and being generic. Since Make uses
+# file names to identify which rule to execute, and the scope of this
+# research project is much less than the generic survey/dataset, it is
+# easier to have a simple/short name for the input dataset and work with
+# that. In the first condition of the recipe below, we connect the short
+# name with the raw database name of the dataset.
+#
+# Download lock file: Most systems have a single connection to the
+# internet, therefore downloading is inherently done in series. As a
+# result, when more than one dataset is necessary for download, if they are
+# done in parallel, the speed will be slower than downloading them in
+# series. We thus use the `flock' program to tie/lock the downloading
+# process with a file and make sure that only one downloading event is in
+# progress at every moment.
+$(indir):; mkdir $@
+downloadwrapper = $(bashdir)/download-multi-try
+inputdatasets = $(foreach i, wfpc2, $(indir)/$(i).fits)
+$(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir)
+
+ # Set the necessary parameters for this input file.
+ if [ $* = wfpc2 ]; then
+ origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5);
+ else
+ echo; echo; echo "Not recognized input dataset: '$*.fits'."
+ echo; echo; exit 1
+ fi
+
+ # Download (or make the link to) the input dataset.
+ if [ -f $(INDIR)/$$origname ]; then
+ ln -s $(INDIR)/$$origname $@
+ else
+ touch $(lockdir)/download
+ $(downloadwrapper) "wget --no-use-server-timestamps -O" \
+ $(lockdir)/download $$url/$$origname $@
+ fi
+
+ # Check the md5 sum to see if this is the proper dataset.
+ sum=$$(md5sum $@ | awk '{print $$1}')
+ if [ $$sum != $$mdf ]; then
+ wrongname=$(dir $@)/wrong-$(notdir $@)
+ mv $@ $$wrongname
+ echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname"
+ echo; echo; exit 1
+ fi
+
+
+
+
+
+# Final TeX macro
+# ---------------
+#
+# It is very important to mention the address where the data were
+# downloaded in the final report.
+$(mtexdir)/download.tex: $(pconfdir)/INPUTS.mk | $(mtexdir)
+ echo "\\newcommand{\\wfpctwourl}{$(WFPC2URL)}" > $@