From e623102768c426e86b0ed73904168006dfea2af9 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Sun, 25 Nov 2018 15:22:48 +0000 Subject: Pipeline now downloads and uses an input dataset In most analysis situations (except for simulations), an input dataset is necessary, but that part of the pipeline was just left out and a general `SURVEY' variable was set and never used. So with this commit, we actually use a sample FITS file from the FITS standard webpage, show it (as well as its histogram) and do some basic calculations on it. This preparation of the input datasets is done in a generic way to enable easy addition of more datasets if necessary. --- reproduce/config/gnuastro/astconvertt.conf | 31 ++++++++++ reproduce/config/gnuastro/aststatistics.conf | 34 +++++++++++ reproduce/config/pipeline/INPUTS.mk | 9 +++ reproduce/config/pipeline/LOCAL.mk.in | 4 ++ reproduce/config/pipeline/delete-me-wfpc2-quant.mk | 2 + reproduce/config/pipeline/dependency-versions.mk | 1 + reproduce/config/pipeline/web.mk | 6 -- reproduce/src/make/delete-me.mk | 71 ++++++++++++++++++++-- reproduce/src/make/dependencies.mk | 7 ++- reproduce/src/make/download.mk | 57 +++++++++++++---- reproduce/src/make/initialize.mk | 9 +++ 11 files changed, 207 insertions(+), 24 deletions(-) create mode 100644 reproduce/config/gnuastro/astconvertt.conf create mode 100644 reproduce/config/gnuastro/aststatistics.conf create mode 100644 reproduce/config/pipeline/INPUTS.mk create mode 100644 reproduce/config/pipeline/delete-me-wfpc2-quant.mk delete mode 100644 reproduce/config/pipeline/web.mk (limited to 'reproduce') diff --git a/reproduce/config/gnuastro/astconvertt.conf b/reproduce/config/gnuastro/astconvertt.conf new file mode 100644 index 0000000..fc3ba04 --- /dev/null +++ b/reproduce/config/gnuastro/astconvertt.conf @@ -0,0 +1,31 @@ +# Default parameters (System) for ConvertType. +# ConvertType is part of GNU Astronomy Utitlies. +# +# Use the long option name of each parameter followed by a value. The name +# and value should be separated by atleast one white-space character (for +# example ` '[space], or tab). Lines starting with `#' are ignored. +# +# For more information, please run these commands: +# +# $ astconvertt --help # Full list of options, short doc. +# $ astconvertt -P # Print all options and used values. +# $ info astconvertt # All options and input/output. +# $ info gnuastro "Configuration files" # How to use configuration files. +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice and +# this notice are preserved. This file is offered as-is, without any +# warranty. + +# Input: + +# Output: + quality 100 + widthincm 10.0 + borderwidth 1 + output jpg + +# Flux: + invert 0 + +# Common options diff --git a/reproduce/config/gnuastro/aststatistics.conf b/reproduce/config/gnuastro/aststatistics.conf new file mode 100644 index 0000000..0bf3b83 --- /dev/null +++ b/reproduce/config/gnuastro/aststatistics.conf @@ -0,0 +1,34 @@ +# Default parameters (System) for Statistics. +# Statistics is part of GNU Astronomy Utitlies. +# +# Use the long option name of each parameter followed by a value. The name +# and value should be separated by atleast one white-space character (for +# example ` '[space], or tab). Lines starting with `#' are ignored. +# +# For more information, please run these commands: +# +# $ aststatistics --help # Full list of options, short doc. +# $ aststatistics -P # Print all options and used values. +# $ info aststatistics # All options and input/output. +# $ info gnuastro "Configuration files" # How to use configuration files. +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice and +# this notice are preserved. This file is offered as-is, without any +# warranty. + +# Input image: + +# Sky and its STD settings + khdu 1 + meanmedqdiff 0.005 + outliersigma 10 + outliersclip 3,0.2 + smoothwidth 3 + sclipparams 3,0.1 + +# Histogram and CFP settings + numasciibins 70 + asciiheight 10 + numbins 100 + mirrordist 1.5 diff --git a/reproduce/config/pipeline/INPUTS.mk b/reproduce/config/pipeline/INPUTS.mk new file mode 100644 index 0000000..3522ecc --- /dev/null +++ b/reproduce/config/pipeline/INPUTS.mk @@ -0,0 +1,9 @@ +# Input files necessary for this pipeline. +# +# This file is read by the configure script and running Makefiles. + + +WFPC2IMAGE = WFPC2ASSNu5780205bx.fits +WFPC2MD5 = a4791e42cd1045892f9c41f11b50bad8 +WFPC2SIZE = 62kb +WFPC2URL = https://fits.gsfc.nasa.gov/samples diff --git a/reproduce/config/pipeline/LOCAL.mk.in b/reproduce/config/pipeline/LOCAL.mk.in index d6bf2c0..89e3e23 100644 --- a/reproduce/config/pipeline/LOCAL.mk.in +++ b/reproduce/config/pipeline/LOCAL.mk.in @@ -1,4 +1,8 @@ # Local pipeline configuration. +# +# This is just a template for the `./configure' script to fill in. Please +# don't make any change to this file. BDIR = @bdir@ +INDIR = @indir@ DEPENDENCIES-DIR = @ddir@ DOWNLOADER = @downloader@ diff --git a/reproduce/config/pipeline/delete-me-wfpc2-quant.mk b/reproduce/config/pipeline/delete-me-wfpc2-quant.mk new file mode 100644 index 0000000..2ff7456 --- /dev/null +++ b/reproduce/config/pipeline/delete-me-wfpc2-quant.mk @@ -0,0 +1,2 @@ +# Number of samples to create +delete-me-wfpc2-quantile = 0.65 diff --git a/reproduce/config/pipeline/dependency-versions.mk b/reproduce/config/pipeline/dependency-versions.mk index f85cdbf..dc45b81 100644 --- a/reproduce/config/pipeline/dependency-versions.mk +++ b/reproduce/config/pipeline/dependency-versions.mk @@ -5,6 +5,7 @@ bash-version = 4.4.18 bzip2-version = 1.0.6 cmake-version = 3.12.4 coreutils-version = 8.30 +flock-version = 0.2.3 gawk-version = 4.2.1 ghostscript-version = 9.26 git-version = 2.19.1 diff --git a/reproduce/config/pipeline/web.mk b/reproduce/config/pipeline/web.mk deleted file mode 100644 index 5af11a7..0000000 --- a/reproduce/config/pipeline/web.mk +++ /dev/null @@ -1,6 +0,0 @@ -# Web server(s) hosting the input data for this pipeline. -# -# This is the web page containing the files that must be located in the -# `SURVEY' directory of `reproduce/config/pipeline/LOCAL.mk' on the local -# system. -web-survey = https://some.webpage.com/example/server diff --git a/reproduce/src/make/delete-me.mk b/reproduce/src/make/delete-me.mk index 67f0440..9227fde 100644 --- a/reproduce/src/make/delete-me.mk +++ b/reproduce/src/make/delete-me.mk @@ -25,8 +25,7 @@ # Dummy dataset # ------------- # -# We will use AWK's random number generator to generate a random dataset to -# be imported by PGFPlots for a plot in the paper. +# We will use AWK to generate a table showing X and X^2 and draw its plot. dmdir = $(texdir)/delete-me dm = $(dmdir)/data.txt $(dmdir): | $(texdir); mkdir $@ @@ -43,6 +42,60 @@ $(dm): $(pconfdir)/delete-me-num.mk | $(dmdir) +# WFPC2 image PDF +# ----------------- +# +# For an example image, we'll make a PDF copy of the WFPC II image to +# display in the paper. +wfpc2dir = $(texdir)/delete-me-wfpc2 +$(wfpc2dir): | $(texdir); mkdir $@ +wfpc2 = $(wfpc2dir)/wfpc2.pdf +$(wfpc2): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir) + + # When the plotted values are re-made, it is necessary to also + # delete the TiKZ externalized files so the plot is also re-made. + rm -f $(tikzdir)/delete-me-wfpc2.pdf + + # Convert the dataset to a PDF. + astconvertt --fluxhigh=4 $< -h0 -o$@ + + + + + +# Histogram of WFPC2 image +# ------------------------ +# +# For an example plot, we'll show the pixel value histogram also. +wfpc2hist = $(wfpc2dir)/wfpc2-hist.txt +$(wfpc2hist): $(indir)/$(WFPC2IMAGE) | $(wfpc2dir) + + # When the plotted values are re-made, it is necessary to also + # delete the TiKZ externalized files so the plot is also re-made. + rm -f $(tikzdir)/delete-me-wfpc2.pdf + + # Generate the pixel value distribution + aststatistics --lessthan=5 $< -h0 --histogram -o$@ + + + + + +# Basic statistics +# ---------------- +# +# This is just as a demonstration on how to get analysic configuration +# parameters from variables defined in `reproduce/config/pipeline'. +wfpc2stats = $(wfpc2dir)/wfpc2-stats.txt +$(wfpc2stats): $(indir)/$(WFPC2IMAGE) $(pconfdir)/delete-me-wfpc2-quant.mk \ + | $(wfpc2dir) + aststatistics $< -h0 --mean --median \ + --quantile=$(delete-me-wfpc2-quantile) > $@ + + + + + # TeX macros # ---------- # @@ -50,7 +103,7 @@ $(dm): $(pconfdir)/delete-me-num.mk | $(dmdir) # # NOTE: In LaTeX you cannot use any non-alphabetic character in a variable # name. -$(mtexdir)/delete-me.tex: $(dm) +$(mtexdir)/delete-me.tex: $(dm) $(wfpc2) $(wfpc2hist) $(wfpc2stats) # Write the number of random values used. echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@ @@ -67,6 +120,16 @@ $(mtexdir)/delete-me.tex: $(dm) {if($$2>max) max=$$2; if($$2> $@; + echo "\newcommand{\deletememin}{$$v}" >> $@ v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}'); echo "\newcommand{\deletememax}{$$v}" >> $@ + + # Write the statistics of the WFPC2 image as a macro. + q=$(delete-me-wfpc2-quantile) + echo "\newcommand{\deletemewfpcquantile}{$$q}" >> $@ + mean=$$(awk '{printf("%.2f", $$1)}' $(wfpc2stats)) + echo "\newcommand{\deletemewfpctwomean}{$$mean}" >> $@ + median=$$(awk '{printf("%.2f", $$2)}' $(wfpc2stats)) + echo "\newcommand{\deletemewfpctwomedian}{$$median}" >> $@ + quantile=$$(awk '{printf("%.2f", $$3)}' $(wfpc2stats)) + echo "\newcommand{\deletemewfpctwoquantile}{$$quantile}" >> $@ diff --git a/reproduce/src/make/dependencies.mk b/reproduce/src/make/dependencies.mk index 8ed359b..a784883 100644 --- a/reproduce/src/make/dependencies.mk +++ b/reproduce/src/make/dependencies.mk @@ -43,7 +43,7 @@ ildir = $(BDIR)/dependencies/installed/lib ilidir = $(BDIR)/dependencies/installed/lib/built # Define the top-level programs to build (installed in `.local/bin'). -top-level-programs = gawk gs grep sed git astnoisechisel texlive-ready +top-level-programs = gawk gs grep sed git flock astnoisechisel texlive-ready all: $(foreach p, $(top-level-programs), $(ibdir)/$(p)) # Other basic environment settings: We are only including the host @@ -75,6 +75,7 @@ LD_LIBRARY_PATH := $(ildir) tarballs = $(foreach t, cfitsio-$(cfitsio-version).tar.gz \ cmake-$(cmake-version).tar.gz \ curl-$(curl-version).tar.gz \ + flock-$(flock-version).tar.xz \ gawk-$(gawk-version).tar.lz \ ghostscript-$(ghostscript-version).tar.gz \ git-$(git-version).tar.xz \ @@ -111,6 +112,7 @@ $(tarballs): $(tdir)/%: w=https://heasarc.gsfc.nasa.gov/FTP/software/fitsio/c/cfitsio$$v.tar.gz elif [ $$n = cmake ]; then w=https://cmake.org/files/v3.12 elif [ $$n = curl ]; then w=https://curl.haxx.se/download + elif [ $$n = flock ]; then w=https://github.com/discoteq/flock/releases/download/v$(flock-version) elif [ $$n = gawk ]; then w=http://ftp.gnu.org/gnu/gawk elif [ $$n = ghostscript ]; then w=https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs926 elif [ $$n = git ]; then w=https://mirrors.edge.kernel.org/pub/software/scm/git @@ -244,6 +246,9 @@ $(ibdir)/libtool: $(tdir)/libtool-$(libtool-version).tar.xz $(ibdir)/gs: $(tdir)/ghostscript-$(ghostscript-version).tar.gz $(call gbuild, $<, ghostscript-$(ghostscript-version)) +$(ibdir)/flock: $(tdir)/flock-$(flock-version).tar.xz + $(call gbuild, $<, flock-$(flock-version), static) + $(ibdir)/git: $(tdir)/git-$(git-version).tar.xz \ $(ilidir)/zlib $(call gbuild, $<, git-$(git-version), static) diff --git a/reproduce/src/make/download.mk b/reproduce/src/make/download.mk index 9617a45..180d2cf 100644 --- a/reproduce/src/make/download.mk +++ b/reproduce/src/make/download.mk @@ -25,20 +25,51 @@ -# Download SURVEY data +# Download input data # -------------------- # -# Data from a survey (for example an imaging survey) usually have a special -# file-name format which should be set here in the `foreach' loop. Note -# that the `foreach' function needs the backslash (`\') at the end of the -# line when it is broken into multiple lines. -all-survey = $(foreach f, $(filters-survey), \ - $(SURVEY)/a-special-format-$(f).fits \ - $(SURVEY)/a-possibly-additional-$(f)-format.fits ) -$(SURVEY):; mkdir $@ -$(all-survey): $(SURVEY)/%: | $(SURVEY) $(lockdir) - flock $(lockdir)/download -c "$(DOWNLOADER) $@ $(web-survey)/$*" +# The input dataset properties are defined in `$(pconfdir)/INPUTS.mk'. For +# this template pipeline we only have one dataset to enable easy +# processing, so all the extra checks in this rule may seem +# redundant. +# +# However, in a real project, you will need more than one dataset. In that +# case, just add them to the target list and add an `elif' statement to +# define it in the recipe. +# +# Download lock file: Most systems have a single connection to the +# internet, therefore downloading is inherently done in series. As a +# result, when more than one dataset is necessary for download, if they are +# done in parallel, the speed will be slower than downloading them in +# series. We thus use the `flock' program to tie/lock the downloading +# process with a file and make sure that only one downloading event is in +# progress at every moment. +$(indir):; mkdir $@ +inputdatasets = $(foreach i, $(WFPC2IMAGE), $(indir)/$(i)) +$(inputdatasets): $(indir)/%: | $(indir) $(lockdir) + + # Set the necessary parameters for this input file. + if [ $* = $(WFPC2IMAGE) ]; then url=$(WFPC2URL); mdf=$(WFPC2MD5); + else + echo; echo; echo "Not recognized input dataset: '$*'." + echo; echo; exit 1 + fi + + # Download (or make the link to) the input dataset. + if [ -f $(INDIR)/$* ]; then + ln -s $(INDIR)/$* $@ + else + flock $(lockdir)/download $(DOWNLOADER) $@ $$url/$* + fi + # Check the md5 sum to see if this is the proper dataset. + sum=$$(md5sum $@ | awk '{print $$1}') + if [ $$sum != $$mdf ]; then + wrongname=$(dir $@)/wrong-$(notdir $@) + mv $@ $$wrongname + echo; echo; echo "Wrong MD5 checksum for '$*' in $$wrongname" + echo; echo; exit 1 + fi @@ -49,5 +80,5 @@ $(all-survey): $(SURVEY)/%: | $(SURVEY) $(lockdir) # # It is very important to mention the address where the data were # downloaded in the final report. -$(mtexdir)/download.tex: $(pconfdir)/web.mk | $(mtexdir) - @echo "\\newcommand{\\websurvey}{$(web-survey)}" > $@ +$(mtexdir)/download.tex: $(pconfdir)/INPUTS.mk | $(mtexdir) + echo "\\newcommand{\\wfpctwourl}{$(WFPC2URL)}" > $@ diff --git a/reproduce/src/make/initialize.mk b/reproduce/src/make/initialize.mk index 694aca0..41a5e05 100644 --- a/reproduce/src/make/initialize.mk +++ b/reproduce/src/make/initialize.mk @@ -34,6 +34,7 @@ # parallel. Also, some programs may not be thread-safe, therefore it will # be necessary to put a lock on them. This pipeline uses the `flock' # program to achieve this. +indir = $(BDIR)/inputs texdir = $(BDIR)/tex srcdir = reproduce/src lockdir = $(BDIR)/locks @@ -224,6 +225,14 @@ $(mtexdir)/initialize.tex: | $(mtexdir) fi; \ echo "\newcommand{\\bziptwoversion}{$(bzip2-version)}" >> $@ + # Unfortunately we couldn't find a way to retrieve the version of + # the discoteq `flock' that we are using here. So we'll just repot + # the version we downloaded and installed. + echo "\newcommand{\\flockversion}{$(flock-version)}" >> $@ + + + + # Versions of libraries. $(call lvcheck, fitsio.h, $(cfitsio-version), CFITSIO, cfitsioversion) -- cgit v1.2.1