More generic naming convention

Until now, we were keeping the input file within the reproduction pipeline's directories using the same name as the database/server. Now, we are using a short/summarized filename convention for the input dataset.
author: Mohammad Akhlaghi <mohammad@akhlaghi.org> 2018-11-25 18:28:04 +0000
committer: Mohammad Akhlaghi <mohammad@akhlaghi.org> 2018-11-25 18:36:30 +0000
commit: a60db913794a7e0563a5c3443311a955a98559f5 (patch)
tree: 4f02df6c0a78f7a0c0fc44a50584b18858a0e46f /reproduce
parent: 46f639963cf98d3a380a2577eb0c8f138ea451d5 (diff)
4 files changed, 33 insertions, 58 deletions
diff --git a/reproduce/config/pipeline/delete-me-wfpc2-quant.mk b/reproduce/config/pipeline/delete-me-wfpc2-quant.mk
deleted file mode 100644
index 2ff7456..0000000
--- a/reproduce/config/pipeline/delete-me-wfpc2-quant.mk
+++ /dev/null
@@ -1,2 +0,0 @@
-# Number of samples to create
-delete-me-wfpc2-quantile = 0.65
diff --git a/reproduce/config/pipeline/filters.mk b/reproduce/config/pipeline/filters.mk
deleted file mode 100644
index 6fa785d..0000000
--- a/reproduce/config/pipeline/filters.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-# `filters' are the possible different parts of the survey, for
-# example filters in broad or narrow-band astronomical imaging
-# datasets. Since a generic term for them (to apply other types of
-# surveys/datasets) hasn't been considered yet, we'll stick with the
-# `filters' name. But feel free to correct it (or propose a
-# suggestion).
-#
-# If your dataset only has a single filter, or this concept is not
-# defined for your type of input dataset, you can ignore this
-# variable.
-#
-# The values can be any string to identify different parts of a survey
-# separated by white space characters (for example `f125w f160w' or `J
-# H' if you want to specify two filters).
-#
-# To be clean and also help in readability of the pipeline, it is good
-# practice to define a separate `filter-XXXX' variable for each
-# survey/dataset, even if they have overlapping filters.
-#
-# These `filters' are used in the initial downloading of the data and
-# it is good practice (for avoiding bugs) to keep the same filter (and
-# survey) names in the filenames of the intermediate/output files
-# also. This will make sure that the raw input and intermediate/final
-# output are exactly related.
-filters-survey = a b c d e f g h i
diff --git a/reproduce/src/make/delete-me.mk b/reproduce/src/make/delete-me.mk
index 9227fde..3f54947 100644
--- a/reproduce/src/make/delete-me.mk
+++ b/reproduce/src/make/delete-me.mk
@@ -47,10 +47,10 @@ $(dm): $(pconfdir)/delete-me-num.mk | $(dmdir)
 #
 # For an example image, we'll make a PDF copy of the WFPC II image to
 # display in the paper.
-wfpc2dir = $(texdir)/delete-me-wfpc2
-$(wfpc2dir): | $(texdir); mkdir $@
-wfpc2 = $(wfpc2dir)/wfpc2.pdf
-$(wfpc2): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir)
+dddemodir = $(texdir)/delete-me-demo
+$(dddemodir): | $(texdir); mkdir $@
+demopdf = $(dddemodir)/wfpc2.pdf
+$(demopdf): $(dddemodir)/%.pdf: $(indir)/%.fits | $(dddemodir)
 
         # When the plotted values are re-made, it is necessary to also
         # delete the TiKZ externalized files so the plot is also re-made.
@@ -67,8 +67,8 @@ $(wfpc2): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir)
 # ------------------------
 #
 # For an example plot, we'll show the pixel value histogram also.
-wfpc2hist = $(wfpc2dir)/wfpc2-hist.txt
-$(wfpc2hist): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir)
+histogram = $(dddemodir)/wfpc2-hist.txt
+$(histogram): $(dddemodir)/%-hist.txt: $(indir)/%.fits | $(dddemodir)
 
         # When the plotted values are re-made, it is necessary to also
         # delete the TiKZ externalized files so the plot is also re-made.
@@ -86,11 +86,9 @@ $(wfpc2hist): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir)
 #
 # This is just as a demonstration on how to get analysic configuration
 # parameters from variables defined in `reproduce/config/pipeline'.
-wfpc2stats = $(wfpc2dir)/wfpc2-stats.txt
-$(wfpc2stats): $(indir)/$(WFPC2IMAGE) $(pconfdir)/delete-me-wfpc2-quant.mk \
-              | $(wfpc2dir)
-	aststatistics $< -h0 --mean --median                        \
-	              --quantile=$(delete-me-wfpc2-quantile) > $@
+stats = $(dddemodir)/wfpc2-stats.txt
+$(stats): $(dddemodir)/%-stats.txt: $(indir)/%.fits | $(dddemodir)
+	aststatistics $< -h0 --mean --median > $@
 
 
 
@@ -103,7 +101,7 @@ $(wfpc2stats): $(indir)/$(WFPC2IMAGE) $(pconfdir)/delete-me-wfpc2-quant.mk \
 #
 # NOTE: In LaTeX you cannot use any non-alphabetic character in a variable
 # name.
-$(mtexdir)/delete-me.tex: $(dm) $(wfpc2) $(wfpc2hist) $(wfpc2stats)
+$(mtexdir)/delete-me.tex: $(dm) $(demopdf) $(histogram) $(stats)
 
         # Write the number of random values used.
 	echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@
@@ -125,11 +123,7 @@ $(mtexdir)/delete-me.tex: $(dm) $(wfpc2) $(wfpc2hist) $(wfpc2stats)
 	echo "\newcommand{\deletememax}{$$v}"             >> $@
 
         # Write the statistics of the WFPC2 image as a macro.
-	q=$(delete-me-wfpc2-quantile)
-	echo "\newcommand{\deletemewfpcquantile}{$$q}"            >> $@
-	mean=$$(awk     '{printf("%.2f", $$1)}' $(wfpc2stats))
+	mean=$$(awk     '{printf("%.2f", $$1)}' $(stats))
 	echo "\newcommand{\deletemewfpctwomean}{$$mean}"          >> $@
-	median=$$(awk   '{printf("%.2f", $$2)}' $(wfpc2stats))
+	median=$$(awk   '{printf("%.2f", $$2)}' $(stats))
 	echo "\newcommand{\deletemewfpctwomedian}{$$median}"      >> $@
-	quantile=$$(awk '{printf("%.2f", $$3)}' $(wfpc2stats))
-	echo "\newcommand{\deletemewfpctwoquantile}{$$quantile}"  >> $@
diff --git a/reproduce/src/make/download.mk b/reproduce/src/make/download.mk
index 180d2cf..332392b 100644
--- a/reproduce/src/make/download.mk
+++ b/reproduce/src/make/download.mk
@@ -30,12 +30,19 @@
 #
 # The input dataset properties are defined in `$(pconfdir)/INPUTS.mk'. For
 # this template pipeline we only have one dataset to enable easy
-# processing, so all the extra checks in this rule may seem
-# redundant.
+# processing, so all the extra checks in this rule may seem redundant.
 #
-# However, in a real project, you will need more than one dataset. In that
-# case, just add them to the target list and add an `elif' statement to
-# define it in the recipe.
+# In a real project, you will need more than one dataset. In that case,
+# just add them to the target list and add an `elif' statement to define it
+# in the recipe.
+#
+# Files in a server usually have very long names, which are mainly designed
+# for helping in data-base management and being generic. Since Make uses
+# file names to identify which rule to execute, and the scope of this
+# research pipeline is much less than the generic survey/dataset, it is
+# easier to have a simple/short name for the input dataset and work with
+# that. In the first condition of the recipe below, we connect the short
+# name with the raw database name of the dataset.
 #
 # Download lock file: Most systems have a single connection to the
 # internet, therefore downloading is inherently done in series. As a
@@ -45,21 +52,22 @@
 # process with a file and make sure that only one downloading event is in
 # progress at every moment.
 $(indir):; mkdir $@
-inputdatasets = $(foreach i, $(WFPC2IMAGE), $(indir)/$(i))
-$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
+inputdatasets = $(foreach i, wfpc2, $(indir)/$(i).fits)
+$(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir)
 
         # Set the necessary parameters for this input file.
-	if   [ $* = $(WFPC2IMAGE) ]; then url=$(WFPC2URL); mdf=$(WFPC2MD5);
+	if   [ $* = wfpc2 ]; then
+	  origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5);
 	else
-	echo; echo; echo "Not recognized input dataset: '$*'."
+	echo; echo; echo "Not recognized input dataset: '$*.fits'."
 	echo; echo; exit 1
 	fi
 
         # Download (or make the link to) the input dataset.
-	if [ -f $(INDIR)/$* ]; then
-	  ln -s $(INDIR)/$* $@
+	if [ -f $(INDIR)/$$origname ]; then
+	  ln -s $(INDIR)/$$origname $@
 	else
-	  flock $(lockdir)/download $(DOWNLOADER) $@ $$url/$*
+	  flock $(lockdir)/download $(DOWNLOADER) $@ $$url/$$origname
 	fi
 
         # Check the md5 sum to see if this is the proper dataset.
@@ -67,7 +75,7 @@ $(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
 	if [ $$sum != $$mdf ]; then
 	  wrongname=$(dir $@)/wrong-$(notdir $@)
 	  mv $@ $$wrongname
-	  echo; echo; echo "Wrong MD5 checksum for '$*' in $$wrongname"
+	  echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname"
 	  echo; echo; exit 1
 	fi
author	Mohammad Akhlaghi <mohammad@akhlaghi.org>	2018-11-25 18:28:04 +0000
committer	Mohammad Akhlaghi <mohammad@akhlaghi.org>	2018-11-25 18:36:30 +0000
commit	a60db913794a7e0563a5c3443311a955a98559f5 (patch)
tree	4f02df6c0a78f7a0c0fc44a50584b18858a0e46f /reproduce
parent	46f639963cf98d3a380a2577eb0c8f138ea451d5 (diff)