From a60db913794a7e0563a5c3443311a955a98559f5 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Sun, 25 Nov 2018 18:28:04 +0000 Subject: More generic naming convention Until now, we were keeping the input file within the reproduction pipeline's directories using the same name as the database/server. Now, we are using a short/summarized filename convention for the input dataset. --- reproduce/config/pipeline/delete-me-wfpc2-quant.mk | 2 -- reproduce/config/pipeline/filters.mk | 25 ---------------- reproduce/src/make/delete-me.mk | 30 ++++++++----------- reproduce/src/make/download.mk | 34 +++++++++++++--------- 4 files changed, 33 insertions(+), 58 deletions(-) delete mode 100644 reproduce/config/pipeline/delete-me-wfpc2-quant.mk delete mode 100644 reproduce/config/pipeline/filters.mk (limited to 'reproduce') diff --git a/reproduce/config/pipeline/delete-me-wfpc2-quant.mk b/reproduce/config/pipeline/delete-me-wfpc2-quant.mk deleted file mode 100644 index 2ff7456..0000000 --- a/reproduce/config/pipeline/delete-me-wfpc2-quant.mk +++ /dev/null @@ -1,2 +0,0 @@ -# Number of samples to create -delete-me-wfpc2-quantile = 0.65 diff --git a/reproduce/config/pipeline/filters.mk b/reproduce/config/pipeline/filters.mk deleted file mode 100644 index 6fa785d..0000000 --- a/reproduce/config/pipeline/filters.mk +++ /dev/null @@ -1,25 +0,0 @@ -# `filters' are the possible different parts of the survey, for -# example filters in broad or narrow-band astronomical imaging -# datasets. Since a generic term for them (to apply other types of -# surveys/datasets) hasn't been considered yet, we'll stick with the -# `filters' name. But feel free to correct it (or propose a -# suggestion). -# -# If your dataset only has a single filter, or this concept is not -# defined for your type of input dataset, you can ignore this -# variable. -# -# The values can be any string to identify different parts of a survey -# separated by white space characters (for example `f125w f160w' or `J -# H' if you want to specify two filters). -# -# To be clean and also help in readability of the pipeline, it is good -# practice to define a separate `filter-XXXX' variable for each -# survey/dataset, even if they have overlapping filters. -# -# These `filters' are used in the initial downloading of the data and -# it is good practice (for avoiding bugs) to keep the same filter (and -# survey) names in the filenames of the intermediate/output files -# also. This will make sure that the raw input and intermediate/final -# output are exactly related. -filters-survey = a b c d e f g h i diff --git a/reproduce/src/make/delete-me.mk b/reproduce/src/make/delete-me.mk index 9227fde..3f54947 100644 --- a/reproduce/src/make/delete-me.mk +++ b/reproduce/src/make/delete-me.mk @@ -47,10 +47,10 @@ $(dm): $(pconfdir)/delete-me-num.mk | $(dmdir) # # For an example image, we'll make a PDF copy of the WFPC II image to # display in the paper. -wfpc2dir = $(texdir)/delete-me-wfpc2 -$(wfpc2dir): | $(texdir); mkdir $@ -wfpc2 = $(wfpc2dir)/wfpc2.pdf -$(wfpc2): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir) +dddemodir = $(texdir)/delete-me-demo +$(dddemodir): | $(texdir); mkdir $@ +demopdf = $(dddemodir)/wfpc2.pdf +$(demopdf): $(dddemodir)/%.pdf: $(indir)/%.fits | $(dddemodir) # When the plotted values are re-made, it is necessary to also # delete the TiKZ externalized files so the plot is also re-made. @@ -67,8 +67,8 @@ $(wfpc2): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir) # ------------------------ # # For an example plot, we'll show the pixel value histogram also. -wfpc2hist = $(wfpc2dir)/wfpc2-hist.txt -$(wfpc2hist): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir) +histogram = $(dddemodir)/wfpc2-hist.txt +$(histogram): $(dddemodir)/%-hist.txt: $(indir)/%.fits | $(dddemodir) # When the plotted values are re-made, it is necessary to also # delete the TiKZ externalized files so the plot is also re-made. @@ -86,11 +86,9 @@ $(wfpc2hist): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir) # # This is just as a demonstration on how to get analysic configuration # parameters from variables defined in `reproduce/config/pipeline'. -wfpc2stats = $(wfpc2dir)/wfpc2-stats.txt -$(wfpc2stats): $(indir)/$(WFPC2IMAGE) $(pconfdir)/delete-me-wfpc2-quant.mk \ - | $(wfpc2dir) - aststatistics $< -h0 --mean --median \ - --quantile=$(delete-me-wfpc2-quantile) > $@ +stats = $(dddemodir)/wfpc2-stats.txt +$(stats): $(dddemodir)/%-stats.txt: $(indir)/%.fits | $(dddemodir) + aststatistics $< -h0 --mean --median > $@ @@ -103,7 +101,7 @@ $(wfpc2stats): $(indir)/$(WFPC2IMAGE) $(pconfdir)/delete-me-wfpc2-quant.mk \ # # NOTE: In LaTeX you cannot use any non-alphabetic character in a variable # name. -$(mtexdir)/delete-me.tex: $(dm) $(wfpc2) $(wfpc2hist) $(wfpc2stats) +$(mtexdir)/delete-me.tex: $(dm) $(demopdf) $(histogram) $(stats) # Write the number of random values used. echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@ @@ -125,11 +123,7 @@ $(mtexdir)/delete-me.tex: $(dm) $(wfpc2) $(wfpc2hist) $(wfpc2stats) echo "\newcommand{\deletememax}{$$v}" >> $@ # Write the statistics of the WFPC2 image as a macro. - q=$(delete-me-wfpc2-quantile) - echo "\newcommand{\deletemewfpcquantile}{$$q}" >> $@ - mean=$$(awk '{printf("%.2f", $$1)}' $(wfpc2stats)) + mean=$$(awk '{printf("%.2f", $$1)}' $(stats)) echo "\newcommand{\deletemewfpctwomean}{$$mean}" >> $@ - median=$$(awk '{printf("%.2f", $$2)}' $(wfpc2stats)) + median=$$(awk '{printf("%.2f", $$2)}' $(stats)) echo "\newcommand{\deletemewfpctwomedian}{$$median}" >> $@ - quantile=$$(awk '{printf("%.2f", $$3)}' $(wfpc2stats)) - echo "\newcommand{\deletemewfpctwoquantile}{$$quantile}" >> $@ diff --git a/reproduce/src/make/download.mk b/reproduce/src/make/download.mk index 180d2cf..332392b 100644 --- a/reproduce/src/make/download.mk +++ b/reproduce/src/make/download.mk @@ -30,12 +30,19 @@ # # The input dataset properties are defined in `$(pconfdir)/INPUTS.mk'. For # this template pipeline we only have one dataset to enable easy -# processing, so all the extra checks in this rule may seem -# redundant. +# processing, so all the extra checks in this rule may seem redundant. # -# However, in a real project, you will need more than one dataset. In that -# case, just add them to the target list and add an `elif' statement to -# define it in the recipe. +# In a real project, you will need more than one dataset. In that case, +# just add them to the target list and add an `elif' statement to define it +# in the recipe. +# +# Files in a server usually have very long names, which are mainly designed +# for helping in data-base management and being generic. Since Make uses +# file names to identify which rule to execute, and the scope of this +# research pipeline is much less than the generic survey/dataset, it is +# easier to have a simple/short name for the input dataset and work with +# that. In the first condition of the recipe below, we connect the short +# name with the raw database name of the dataset. # # Download lock file: Most systems have a single connection to the # internet, therefore downloading is inherently done in series. As a @@ -45,21 +52,22 @@ # process with a file and make sure that only one downloading event is in # progress at every moment. $(indir):; mkdir $@ -inputdatasets = $(foreach i, $(WFPC2IMAGE), $(indir)/$(i)) -$(inputdatasets): $(indir)/%: | $(indir) $(lockdir) +inputdatasets = $(foreach i, wfpc2, $(indir)/$(i).fits) +$(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # Set the necessary parameters for this input file. - if [ $* = $(WFPC2IMAGE) ]; then url=$(WFPC2URL); mdf=$(WFPC2MD5); + if [ $* = wfpc2 ]; then + origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); else - echo; echo; echo "Not recognized input dataset: '$*'." + echo; echo; echo "Not recognized input dataset: '$*.fits'." echo; echo; exit 1 fi # Download (or make the link to) the input dataset. - if [ -f $(INDIR)/$* ]; then - ln -s $(INDIR)/$* $@ + if [ -f $(INDIR)/$$origname ]; then + ln -s $(INDIR)/$$origname $@ else - flock $(lockdir)/download $(DOWNLOADER) $@ $$url/$* + flock $(lockdir)/download $(DOWNLOADER) $@ $$url/$$origname fi # Check the md5 sum to see if this is the proper dataset. @@ -67,7 +75,7 @@ $(inputdatasets): $(indir)/%: | $(indir) $(lockdir) if [ $$sum != $$mdf ]; then wrongname=$(dir $@)/wrong-$(notdir $@) mv $@ $$wrongname - echo; echo; echo "Wrong MD5 checksum for '$*' in $$wrongname" + echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname" echo; echo; exit 1 fi -- cgit v1.2.1