From a60db913794a7e0563a5c3443311a955a98559f5 Mon Sep 17 00:00:00 2001
From: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Date: Sun, 25 Nov 2018 18:28:04 +0000
Subject: More generic naming convention

Until now, we were keeping the input file within the reproduction
pipeline's directories using the same name as the database/server. Now, we
are using a short/summarized filename convention for the input dataset.
---
 reproduce/src/make/download.mk | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'reproduce/src/make/download.mk')

diff --git a/reproduce/src/make/download.mk b/reproduce/src/make/download.mk
index 180d2cf..332392b 100644
--- a/reproduce/src/make/download.mk
+++ b/reproduce/src/make/download.mk
@@ -30,12 +30,19 @@
 #
 # The input dataset properties are defined in `$(pconfdir)/INPUTS.mk'. For
 # this template pipeline we only have one dataset to enable easy
-# processing, so all the extra checks in this rule may seem
-# redundant.
+# processing, so all the extra checks in this rule may seem redundant.
 #
-# However, in a real project, you will need more than one dataset. In that
-# case, just add them to the target list and add an `elif' statement to
-# define it in the recipe.
+# In a real project, you will need more than one dataset. In that case,
+# just add them to the target list and add an `elif' statement to define it
+# in the recipe.
+#
+# Files in a server usually have very long names, which are mainly designed
+# for helping in data-base management and being generic. Since Make uses
+# file names to identify which rule to execute, and the scope of this
+# research pipeline is much less than the generic survey/dataset, it is
+# easier to have a simple/short name for the input dataset and work with
+# that. In the first condition of the recipe below, we connect the short
+# name with the raw database name of the dataset.
 #
 # Download lock file: Most systems have a single connection to the
 # internet, therefore downloading is inherently done in series. As a
@@ -45,21 +52,22 @@
 # process with a file and make sure that only one downloading event is in
 # progress at every moment.
 $(indir):; mkdir $@
-inputdatasets = $(foreach i, $(WFPC2IMAGE), $(indir)/$(i))
-$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
+inputdatasets = $(foreach i, wfpc2, $(indir)/$(i).fits)
+$(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir)
 
         # Set the necessary parameters for this input file.
-	if   [ $* = $(WFPC2IMAGE) ]; then url=$(WFPC2URL); mdf=$(WFPC2MD5);
+	if   [ $* = wfpc2 ]; then
+	  origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5);
 	else
-	echo; echo; echo "Not recognized input dataset: '$*'."
+	echo; echo; echo "Not recognized input dataset: '$*.fits'."
 	echo; echo; exit 1
 	fi
 
         # Download (or make the link to) the input dataset.
-	if [ -f $(INDIR)/$* ]; then
-	  ln -s $(INDIR)/$* $@
+	if [ -f $(INDIR)/$$origname ]; then
+	  ln -s $(INDIR)/$$origname $@
 	else
-	  flock $(lockdir)/download $(DOWNLOADER) $@ $$url/$*
+	  flock $(lockdir)/download $(DOWNLOADER) $@ $$url/$$origname
 	fi
 
         # Check the md5 sum to see if this is the proper dataset.
@@ -67,7 +75,7 @@ $(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
 	if [ $$sum != $$mdf ]; then
 	  wrongname=$(dir $@)/wrong-$(notdir $@)
 	  mv $@ $$wrongname
-	  echo; echo; echo "Wrong MD5 checksum for '$*' in $$wrongname"
+	  echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname"
 	  echo; echo; exit 1
 	fi
 
-- 
cgit v1.2.1