aboutsummaryrefslogtreecommitdiff
path: root/reproduce/src
diff options
context:
space:
mode:
Diffstat (limited to 'reproduce/src')
-rw-r--r--reproduce/src/make/delete-me.mk71
-rw-r--r--reproduce/src/make/dependencies.mk7
-rw-r--r--reproduce/src/make/download.mk57
-rw-r--r--reproduce/src/make/initialize.mk9
4 files changed, 126 insertions, 18 deletions
diff --git a/reproduce/src/make/delete-me.mk b/reproduce/src/make/delete-me.mk
index 67f0440..9227fde 100644
--- a/reproduce/src/make/delete-me.mk
+++ b/reproduce/src/make/delete-me.mk
@@ -25,8 +25,7 @@
# Dummy dataset
# -------------
#
-# We will use AWK's random number generator to generate a random dataset to
-# be imported by PGFPlots for a plot in the paper.
+# We will use AWK to generate a table showing X and X^2 and draw its plot.
dmdir = $(texdir)/delete-me
dm = $(dmdir)/data.txt
$(dmdir): | $(texdir); mkdir $@
@@ -43,6 +42,60 @@ $(dm): $(pconfdir)/delete-me-num.mk | $(dmdir)
+# WFPC2 image PDF
+# -----------------
+#
+# For an example image, we'll make a PDF copy of the WFPC II image to
+# display in the paper.
+wfpc2dir = $(texdir)/delete-me-wfpc2
+$(wfpc2dir): | $(texdir); mkdir $@
+wfpc2 = $(wfpc2dir)/wfpc2.pdf
+$(wfpc2): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir)
+
+ # When the plotted values are re-made, it is necessary to also
+ # delete the TiKZ externalized files so the plot is also re-made.
+ rm -f $(tikzdir)/delete-me-wfpc2.pdf
+
+ # Convert the dataset to a PDF.
+ astconvertt --fluxhigh=4 $< -h0 -o$@
+
+
+
+
+
+# Histogram of WFPC2 image
+# ------------------------
+#
+# For an example plot, we'll show the pixel value histogram also.
+wfpc2hist = $(wfpc2dir)/wfpc2-hist.txt
+$(wfpc2hist): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir)
+
+ # When the plotted values are re-made, it is necessary to also
+ # delete the TiKZ externalized files so the plot is also re-made.
+ rm -f $(tikzdir)/delete-me-wfpc2.pdf
+
+ # Generate the pixel value distribution
+ aststatistics --lessthan=5 $< -h0 --histogram -o$@
+
+
+
+
+
+# Basic statistics
+# ----------------
+#
+# This is just as a demonstration on how to get analysic configuration
+# parameters from variables defined in `reproduce/config/pipeline'.
+wfpc2stats = $(wfpc2dir)/wfpc2-stats.txt
+$(wfpc2stats): $(indir)/$(WFPC2IMAGE) $(pconfdir)/delete-me-wfpc2-quant.mk \
+ | $(wfpc2dir)
+ aststatistics $< -h0 --mean --median \
+ --quantile=$(delete-me-wfpc2-quantile) > $@
+
+
+
+
+
# TeX macros
# ----------
#
@@ -50,7 +103,7 @@ $(dm): $(pconfdir)/delete-me-num.mk | $(dmdir)
#
# NOTE: In LaTeX you cannot use any non-alphabetic character in a variable
# name.
-$(mtexdir)/delete-me.tex: $(dm)
+$(mtexdir)/delete-me.tex: $(dm) $(wfpc2) $(wfpc2hist) $(wfpc2stats)
# Write the number of random values used.
echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@
@@ -67,6 +120,16 @@ $(mtexdir)/delete-me.tex: $(dm)
{if($$2>max) max=$$2; if($$2<min) min=$$2;}
END{print min, max}' $(dm));
v=$$(echo "$$mm" | awk '{printf "%.3f", $$1}');
- echo "\newcommand{\deletememin}{$$v}" >> $@;
+ echo "\newcommand{\deletememin}{$$v}" >> $@
v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}');
echo "\newcommand{\deletememax}{$$v}" >> $@
+
+ # Write the statistics of the WFPC2 image as a macro.
+ q=$(delete-me-wfpc2-quantile)
+ echo "\newcommand{\deletemewfpcquantile}{$$q}" >> $@
+ mean=$$(awk '{printf("%.2f", $$1)}' $(wfpc2stats))
+ echo "\newcommand{\deletemewfpctwomean}{$$mean}" >> $@
+ median=$$(awk '{printf("%.2f", $$2)}' $(wfpc2stats))
+ echo "\newcommand{\deletemewfpctwomedian}{$$median}" >> $@
+ quantile=$$(awk '{printf("%.2f", $$3)}' $(wfpc2stats))
+ echo "\newcommand{\deletemewfpctwoquantile}{$$quantile}" >> $@
diff --git a/reproduce/src/make/dependencies.mk b/reproduce/src/make/dependencies.mk
index 8ed359b..a784883 100644
--- a/reproduce/src/make/dependencies.mk
+++ b/reproduce/src/make/dependencies.mk
@@ -43,7 +43,7 @@ ildir = $(BDIR)/dependencies/installed/lib
ilidir = $(BDIR)/dependencies/installed/lib/built
# Define the top-level programs to build (installed in `.local/bin').
-top-level-programs = gawk gs grep sed git astnoisechisel texlive-ready
+top-level-programs = gawk gs grep sed git flock astnoisechisel texlive-ready
all: $(foreach p, $(top-level-programs), $(ibdir)/$(p))
# Other basic environment settings: We are only including the host
@@ -75,6 +75,7 @@ LD_LIBRARY_PATH := $(ildir)
tarballs = $(foreach t, cfitsio-$(cfitsio-version).tar.gz \
cmake-$(cmake-version).tar.gz \
curl-$(curl-version).tar.gz \
+ flock-$(flock-version).tar.xz \
gawk-$(gawk-version).tar.lz \
ghostscript-$(ghostscript-version).tar.gz \
git-$(git-version).tar.xz \
@@ -111,6 +112,7 @@ $(tarballs): $(tdir)/%:
w=https://heasarc.gsfc.nasa.gov/FTP/software/fitsio/c/cfitsio$$v.tar.gz
elif [ $$n = cmake ]; then w=https://cmake.org/files/v3.12
elif [ $$n = curl ]; then w=https://curl.haxx.se/download
+ elif [ $$n = flock ]; then w=https://github.com/discoteq/flock/releases/download/v$(flock-version)
elif [ $$n = gawk ]; then w=http://ftp.gnu.org/gnu/gawk
elif [ $$n = ghostscript ]; then w=https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs926
elif [ $$n = git ]; then w=https://mirrors.edge.kernel.org/pub/software/scm/git
@@ -244,6 +246,9 @@ $(ibdir)/libtool: $(tdir)/libtool-$(libtool-version).tar.xz
$(ibdir)/gs: $(tdir)/ghostscript-$(ghostscript-version).tar.gz
$(call gbuild, $<, ghostscript-$(ghostscript-version))
+$(ibdir)/flock: $(tdir)/flock-$(flock-version).tar.xz
+ $(call gbuild, $<, flock-$(flock-version), static)
+
$(ibdir)/git: $(tdir)/git-$(git-version).tar.xz \
$(ilidir)/zlib
$(call gbuild, $<, git-$(git-version), static)
diff --git a/reproduce/src/make/download.mk b/reproduce/src/make/download.mk
index 9617a45..180d2cf 100644
--- a/reproduce/src/make/download.mk
+++ b/reproduce/src/make/download.mk
@@ -25,20 +25,51 @@
-# Download SURVEY data
+# Download input data
# --------------------
#
-# Data from a survey (for example an imaging survey) usually have a special
-# file-name format which should be set here in the `foreach' loop. Note
-# that the `foreach' function needs the backslash (`\') at the end of the
-# line when it is broken into multiple lines.
-all-survey = $(foreach f, $(filters-survey), \
- $(SURVEY)/a-special-format-$(f).fits \
- $(SURVEY)/a-possibly-additional-$(f)-format.fits )
-$(SURVEY):; mkdir $@
-$(all-survey): $(SURVEY)/%: | $(SURVEY) $(lockdir)
- flock $(lockdir)/download -c "$(DOWNLOADER) $@ $(web-survey)/$*"
+# The input dataset properties are defined in `$(pconfdir)/INPUTS.mk'. For
+# this template pipeline we only have one dataset to enable easy
+# processing, so all the extra checks in this rule may seem
+# redundant.
+#
+# However, in a real project, you will need more than one dataset. In that
+# case, just add them to the target list and add an `elif' statement to
+# define it in the recipe.
+#
+# Download lock file: Most systems have a single connection to the
+# internet, therefore downloading is inherently done in series. As a
+# result, when more than one dataset is necessary for download, if they are
+# done in parallel, the speed will be slower than downloading them in
+# series. We thus use the `flock' program to tie/lock the downloading
+# process with a file and make sure that only one downloading event is in
+# progress at every moment.
+$(indir):; mkdir $@
+inputdatasets = $(foreach i, $(WFPC2IMAGE), $(indir)/$(i))
+$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)
+
+ # Set the necessary parameters for this input file.
+ if [ $* = $(WFPC2IMAGE) ]; then url=$(WFPC2URL); mdf=$(WFPC2MD5);
+ else
+ echo; echo; echo "Not recognized input dataset: '$*'."
+ echo; echo; exit 1
+ fi
+
+ # Download (or make the link to) the input dataset.
+ if [ -f $(INDIR)/$* ]; then
+ ln -s $(INDIR)/$* $@
+ else
+ flock $(lockdir)/download $(DOWNLOADER) $@ $$url/$*
+ fi
+ # Check the md5 sum to see if this is the proper dataset.
+ sum=$$(md5sum $@ | awk '{print $$1}')
+ if [ $$sum != $$mdf ]; then
+ wrongname=$(dir $@)/wrong-$(notdir $@)
+ mv $@ $$wrongname
+ echo; echo; echo "Wrong MD5 checksum for '$*' in $$wrongname"
+ echo; echo; exit 1
+ fi
@@ -49,5 +80,5 @@ $(all-survey): $(SURVEY)/%: | $(SURVEY) $(lockdir)
#
# It is very important to mention the address where the data were
# downloaded in the final report.
-$(mtexdir)/download.tex: $(pconfdir)/web.mk | $(mtexdir)
- @echo "\\newcommand{\\websurvey}{$(web-survey)}" > $@
+$(mtexdir)/download.tex: $(pconfdir)/INPUTS.mk | $(mtexdir)
+ echo "\\newcommand{\\wfpctwourl}{$(WFPC2URL)}" > $@
diff --git a/reproduce/src/make/initialize.mk b/reproduce/src/make/initialize.mk
index 694aca0..41a5e05 100644
--- a/reproduce/src/make/initialize.mk
+++ b/reproduce/src/make/initialize.mk
@@ -34,6 +34,7 @@
# parallel. Also, some programs may not be thread-safe, therefore it will
# be necessary to put a lock on them. This pipeline uses the `flock'
# program to achieve this.
+indir = $(BDIR)/inputs
texdir = $(BDIR)/tex
srcdir = reproduce/src
lockdir = $(BDIR)/locks
@@ -224,6 +225,14 @@ $(mtexdir)/initialize.tex: | $(mtexdir)
fi; \
echo "\newcommand{\\bziptwoversion}{$(bzip2-version)}" >> $@
+ # Unfortunately we couldn't find a way to retrieve the version of
+ # the discoteq `flock' that we are using here. So we'll just repot
+ # the version we downloaded and installed.
+ echo "\newcommand{\\flockversion}{$(flock-version)}" >> $@
+
+
+
+
# Versions of libraries.
$(call lvcheck, fitsio.h, $(cfitsio-version), CFITSIO, cfitsioversion)