aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis/make
diff options
context:
space:
mode:
Diffstat (limited to 'reproduce/analysis/make')
-rw-r--r--reproduce/analysis/make/delete-me.mk98
-rw-r--r--reproduce/analysis/make/initialize.mk135
-rw-r--r--reproduce/analysis/make/verify.mk49
3 files changed, 216 insertions, 66 deletions
diff --git a/reproduce/analysis/make/delete-me.mk b/reproduce/analysis/make/delete-me.mk
index fa16102..f45f9ea 100644
--- a/reproduce/analysis/make/delete-me.mk
+++ b/reproduce/analysis/make/delete-me.mk
@@ -22,18 +22,40 @@
# Dummy dataset
# -------------
#
-# We will use AWK to generate a table showing X and X^2 and draw its plot.
-delete-numdir = $(texdir)/delete-me-num
-delete-num = $(delete-numdir)/data.txt
-$(delete-numdir): | $(texdir); mkdir $@
-$(delete-num): $(pconfdir)/delete-me-num.conf | $(delete-numdir)
+# Just as a demonstration(!): we will use AWK to generate a table showing X
+# and X^2 and draw its plot.
+#
+# Note that this dataset is directly read by LaTeX to generate a plot, so
+# we need to put it in the $(tex-publish-dir) directory.
+dm-squared = $(tex-publish-dir)/squared.txt
+$(dm-squared): $(pconfdir)/delete-me-squared-num.conf | $(tex-publish-dir)
# When the plotted values are re-made, it is necessary to also
- # delete the TiKZ externalized files so the plot is also re-made.
- rm -f $(tikzdir)/delete-me.pdf
+ # delete the TiKZ externalized files so the plot is also re-made by
+ # PGFPlots.
+ rm -f $(tikzdir)/delete-me-squared.pdf
+
+ # Write the column metadata in a temporary file name (appending
+ # '.tmp' to the actual target name). Once all steps are done, it is
+ # renamed to the final target. We do this because if there is an
+ # error in the middle, Make will not consider the job to be
+ # complete and will stop here.
+ echo "# Data for demonstration plot of default Maneage (MANaging data linEAGE)." > $@.tmp
+ echo "# It is a simple plot, showing the power of two: y=x^2! " >> $@.tmp
+ echo "# " >> $@.tmp
+ echo "# Column 1: X [arbitrary, f32] The horizontal axis numbers." \
+ >> $@.tmp
+ echo "# Column 2: X_POW2 [arbitrary, f32] The horizontal axis to the power of two." \
+ >> $@.tmp
+ echo "# " >> $@.tmp
+ $(call print-copyright, $@.tmp)
# Generate the table of random values.
- awk 'BEGIN {for(i=1;i<=$(delete-me-num);i+=0.5) print i, i*i; }' > $@
+ awk 'BEGIN {for(i=1;i<=$(delete-me-squared-num);i+=0.5) \
+ printf("%-8.1f%.2f\n", i, i*i); }' >> $@.tmp
+
+ # Write it into the final target
+ mv $@.tmp $@
@@ -44,14 +66,14 @@ $(delete-num): $(pconfdir)/delete-me-num.conf | $(delete-numdir)
#
# For an example image, we'll make a PDF copy of the WFPC II image to
# display in the paper.
-delete-demodir = $(texdir)/delete-me-demo
-$(delete-demodir): | $(texdir); mkdir $@
-delete-pdf = $(delete-demodir)/wfpc2.pdf
-$(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir)
+dm-histdir = $(texdir)/image-histogram
+$(dm-histdir): | $(texdir); mkdir $@
+dm-img-pdf = $(dm-histdir)/wfpc2.pdf
+$(dm-img-pdf): $(dm-histdir)/%.pdf: $(indir)/%.fits | $(dm-histdir)
# When the plotted values are re-made, it is necessary to also
# delete the TiKZ externalized files so the plot is also re-made.
- rm -f $(tikzdir)/delete-me-wfpc2.pdf
+ rm -f $(tikzdir)/delete-me-image-histogram.pdf
# Convert the dataset to a PDF.
astconvertt --colormap=gray --fluxhigh=4 $< -h0 -o$@
@@ -63,17 +85,35 @@ $(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir)
# Histogram of WFPC2 image
# ------------------------
#
-# For an example plot, we'll show the pixel value histogram also.
-delete-histogram = $(delete-demodir)/wfpc2-hist.txt
-$(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \
- | $(delete-demodir)
+# For an example plot, we'll show the pixel value histogram also. IMPORTANT
+# NOTE: because this histogram contains data that is included in a plot, we
+# should publish it, so it will go into the $(tex-publish-dir).
+dm-img-histogram = $(tex-publish-dir)/wfpc2-histogram.txt
+$(dm-img-histogram): $(tex-publish-dir)/%-histogram.txt: $(indir)/%.fits \
+ | $(tex-publish-dir)
# When the plotted values are re-made, it is necessary to also
# delete the TiKZ externalized files so the plot is also re-made.
- rm -f $(tikzdir)/delete-me-wfpc2.pdf
+ rm -f $(tikzdir)/delete-me-image-histogram.pdf
+
+ # Generate the pixel value histogram.
+ aststatistics --lessthan=5 $< -h0 --histogram -o$@.data
+
+ # Put a two-line description of the dataset, copy the column
+ # metadata from '$@.data', and add copyright.
+ echo "# Histogram of example image to demonstrate Maneage (MANaging data linEAGE)." \
+ > $@.tmp
+ echo "# Example image URL: $(WFPC2URL)/$(WFPC2IMAGE)" >> $@.tmp
+ echo "# " >> $@.tmp
+ awk '/^# Column .:/' $@.data >> $@.tmp
+ echo "# " >> $@.tmp
+ $(call print-copyright, $@.tmp)
- # Generate the pixel value distribution
- aststatistics --lessthan=5 $< -h0 --histogram -o$@
+ # Add the column numbers in a formatted manner, rename it to the
+ # output and clean up.
+ awk '!/^#/{printf("%-15.4f%d\n", $$1, $$2)}' $@.data >> $@.tmp
+ mv $@.tmp $@
+ rm $@.data
@@ -84,9 +124,9 @@ $(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \
#
# This is just as a demonstration on how to get analysic configuration
# parameters from variables defined in `reproduce/analysis/config/'.
-delete-stats = $(delete-demodir)/wfpc2-stats.txt
-$(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \
- | $(delete-demodir)
+dm-img-stats = $(dm-histdir)/wfpc2-stats.txt
+$(dm-img-stats): $(dm-histdir)/%-stats.txt: $(indir)/%.fits \
+ | $(dm-histdir)
aststatistics $< -h0 --mean --median > $@
@@ -100,11 +140,11 @@ $(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \
#
# NOTE: In LaTeX you cannot use any non-alphabetic character in a variable
# name.
-$(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \
- $(delete-stats)
+$(mtexdir)/delete-me.tex: $(dm-squared) $(dm-img-pdf) $(dm-img-histogram) \
+ $(dm-img-stats)
# Write the number of random values used.
- echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@
+ echo "\newcommand{\deletemenum}{$(delete-me-squared-num)}" > $@
# Note that since Make variables start with a `$(', if you want to
# use `$' within the shell (not Make), you have to quote any
@@ -116,14 +156,14 @@ $(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \
# macro definition.
mm=$$(awk 'BEGIN{min=99999; max=-min}
!/^#/{if($$2>max) max=$$2; if($$2<min) min=$$2;}
- END{print min, max}' $(delete-num));
+ END{print min, max}' $(dm-squared));
v=$$(echo "$$mm" | awk '{printf "%.3f", $$1}');
echo "\newcommand{\deletememin}{$$v}" >> $@
v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}');
echo "\newcommand{\deletememax}{$$v}" >> $@
# Write the statistics of the WFPC2 image as a macro.
- mean=$$(awk '{printf("%.2f", $$1)}' $(delete-stats))
+ mean=$$(awk '{printf("%.2f", $$1)}' $(dm-img-stats))
echo "\newcommand{\deletemewfpctwomean}{$$mean}" >> $@
- median=$$(awk '{printf("%.2f", $$2)}' $(delete-stats))
+ median=$$(awk '{printf("%.2f", $$2)}' $(dm-img-stats))
echo "\newcommand{\deletemewfpctwomedian}{$$median}" >> $@
diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk
index 4e317bb..19447a6 100644
--- a/reproduce/analysis/make/initialize.mk
+++ b/reproduce/analysis/make/initialize.mk
@@ -202,6 +202,16 @@ $(lockdir): | $(BDIR); mkdir $@
+# Version and distribution tarball definitions
+project-commit-hash := $(shell if [ -d .git ]; then \
+ echo $$(git describe --dirty --always --long); else echo NOGIT; fi)
+project-package-name := maneaged-$(project-commit-hash)
+project-package-contents = $(texdir)/$(project-package-name)
+
+
+
+
+
# High-level Makefile management
# ------------------------------
#
@@ -212,11 +222,8 @@ $(lockdir): | $(BDIR); mkdir $@
# we want to ensure that the file is always built in every run: it contains
# the project version which may change between two separate runs, even when
# no file actually differs.
-packagebasename := $(shell if [ -d .git ]; then \
- echo paper-$$(git describe --dirty --always --long); else echo NOGIT; fi)
-packagecontents = $(texdir)/$(packagebasename)
-.PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \
- $(mtexdir)/initialize.tex
+.PHONY: all clean dist dist-zip dist-lzip distclean clean-mmap \
+ $(project-package-contents) $(mtexdir)/initialize.tex
# --------- Delete for no Gnuastro ---------
clean-mmap:; rm -f reproduce/config/gnuastro/mmap*
@@ -260,11 +267,11 @@ distclean: clean
# that is ready for building the final PDF with LaTeX. This is useful for
# collaborators who only want to contribute to the text of your project,
# without having to worry about the technicalities of the analysis.
-$(packagecontents): paper.pdf | $(texdir)
+$(project-package-contents): paper.pdf | $(texdir)
# Set up the output directory, delete it if it exists and remake it
# to fill with new contents.
- dir=$(texdir)/$(packagebasename)
+ dir=$@
rm -rf $$dir
mkdir $$dir
@@ -298,7 +305,7 @@ $(packagecontents): paper.pdf | $(texdir)
cp -r tex/src $$dir/tex/src
cp tex/tikz/*.pdf $$dir/tex/tikz
cp -r reproduce/* $$dir/reproduce
- cp -r tex/build/!(paper-v*) $$dir/tex/build
+ cp -r tex/build/!($(project-package-name)) $$dir/tex/build
# Clean up un-necessary/local files: 1) the $(texdir)/build*
# directories (when building in a group structure, there will be
@@ -337,32 +344,113 @@ $(packagecontents): paper.pdf | $(texdir)
# Clean temporary (currently those ending in `~') files.
cd $(texdir)
- find $(packagebasename) -name \*~ -delete
- find $(packagebasename) -name \*.swp -delete
+ find $(project-package-name) -name \*~ -delete
+ find $(project-package-name) -name \*.swp -delete
# PROJECT SPECIFIC
# ----------------
# Put any project specific distribution steps here.
# ----------------
-# Package into `.tar.gz'.
-dist: $(packagecontents)
+# Package into `.tar.gz' or '.tar.lz'.
+dist dist-lzip: $(project-package-contents)
curdir=$$(pwd)
cd $(texdir)
- tar -cf $(packagebasename).tar $(packagebasename)
- gzip -f --best $(packagebasename).tar
- rm -rf $(packagebasename)
+ tar -cf $(project-package-name).tar $(project-package-name)
+ if [ $@ = dist ]; then
+ suffix=gz
+ gzip -f --best $(project-package-name).tar
+ elif [ $@ = dist-lzip ]; then
+ suffix=lz
+ lzip -f --best $(project-package-name).tar
+ fi
+ rm -rf $(project-package-name)
cd $$curdir
- mv $(texdir)/$(packagebasename).tar.gz ./
+ mv $(texdir)/$(project-package-name).tar.$$suffix ./
# Package into `.zip'.
-dist-zip: $(packagecontents)
+dist-zip: $(project-package-contents)
curdir=$$(pwd)
cd $(texdir)
- zip -q -r $(packagebasename).zip $(packagebasename)
- rm -rf $(packagebasename)
+ zip -q -r $(project-package-name).zip $(project-package-name)
+ rm -rf $(project-package-name)
+ cd $$curdir
+ mv $(texdir)/$(project-package-name).zip ./
+
+# Package the software tarballs.
+dist-software:
+ curdir=$$(pwd)
+ cd $(BDIR)
+ if [ -d .git ]; then
+ dirname="software-$$(git describe --dirty --always --long)"
+ else
+ dirname="software-NOGIT";
+ fi
+ mkdir $$dirname
+ cp -L software/tarballs/* $$dirname/
+ tar -cf $$dirname.tar $$dirname
+ gzip -f --best $$dirname.tar
+ rm -rf $$dirname
cd $$curdir
- mv $(texdir)/$(packagebasename).zip ./
+ mv $(BDIR)/$$dir.tar.gz ./
+
+
+
+
+
+# Directory containing to-be-published datasets
+# ---------------------------------------------
+#
+# Its good practice (so you don't forget in the last moment!) to have all
+# the plot/figure/table data that you ultimately want to publish in a
+# single directory.
+#
+# There are two types of to-publish data in the project.
+#
+# 1. Those data that also go into LaTeX (for example to give to LateX's
+# PGFPlots package to create the plot internally) should be under the
+# '$(BDIR)/tex' directory (because other LaTeX producers may also need
+# it for example when using './project make dist'). The contents of
+# this directory are directly taken into the tarball.
+#
+# 2. The data that aren't included directly in the LaTeX run of the paper,
+# can be seen as supplements. A good place to keep them is under your
+# build-directory.
+#
+# RECOMMENDATION: don't put the figure/plot/table number in the names of
+# your to-be-published datasets! Given them a descriptive/short name that
+# would be clear to anyone who has read the paper. Later, in the caption
+# (or paper's tex/appendix), you will put links to the dataset on servers
+# like Zenodo (see the "Publication checklist" in 'README-hacking.md').
+tex-publish-dir = $(texdir)/to-publish
+data-publish-dir = $(BDIR)/data-to-publish
+$(tex-publish-dir):; mkdir $@
+$(data-publish-dir):; mkdir $@
+
+
+
+
+
+# Print Copyright statement
+# -------------------------
+#
+# This statement can be used in published datasets that are in plain-text
+# format. It assumes you have already put the data-specific statements in
+# its first argument, it will supplement them with general project links.
+print-copyright = \
+ echo "\# Project title: $(metadata-title)" >> $(1); \
+ echo "\# Git commit (that produced this dataset): $(project-commit-hash)" >> $(1); \
+ echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \
+ if [ x$(metadata-arxiv) != x ]; then \
+ echo "\# Pre-print server: arXiv:$(metadata-arxiv)" >> $(1); fi; \
+ if [ x$(metadata-doi-journal) != x ]; then \
+ echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \
+ if [ x$(metadata-doi-zenodo) != x ]; then \
+ echo "\# DOI (Zenodo): $(metadata-doi-zenodo)" >> $(1); fi; \
+ echo "\#" >> $(1); \
+ echo "\# Copyright (C) $$(date +%Y) $(metadata-copyright-owner)" >> $(1); \
+ echo "\# Dataset is available under $(metadata-copyright)." >> $(1); \
+ echo "\# License URL: $(metadata-copyright-url)" >> $(1);
@@ -377,7 +465,6 @@ dist-zip: $(packagecontents)
# actually exists, it is also aded as a `.PHONY' target above.
$(mtexdir)/initialize.tex: | $(mtexdir)
- # Version of the project.
- @if [ -d .git ]; then v=$$(git describe --dirty --always --long);
- else v=NO-GIT; fi
- echo "\newcommand{\projectversion}{$$v}" > $@
+ # Version and title of project.
+ echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@
+ echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@
diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk
index 43d1472..67b3fea 100644
--- a/reproduce/analysis/make/verify.mk
+++ b/reproduce/analysis/make/verify.mk
@@ -40,22 +40,34 @@ verify-print-tips = \
echo "the following project source file:"; \
echo " reproduce/analysis/make/verify.mk"
-verify-txt-no-comments-leading-space = \
+# Removes following components of a plain-text file, calculates checksum
+# and compares with given checksum:
+# - All commented lines (starting with '#') are removed.
+# - All empty lines are removed.
+# - All space-characters in remaining lines are removed (so the width of
+# the printed columns won't invalidate the verification).
+#
+# It takes three arguments:
+# - First argument: Full address of file to check.
+# - Second argument: Expected checksum of the file to check.
+# - File name to write result.
+verify-txt-no-comments-no-space = \
infile=$(strip $(1)); \
inchecksum=$(strip $(2)); \
+ innobdir=$$(echo $$infile | sed -e's|$(BDIR)/||g'); \
if ! [ -f "$$infile" ]; then \
$(call verify-print-error-start); \
echo "The following file (that should be verified) doesn't exist:"; \
echo " $$infile"; \
echo; exit 1; \
fi; \
- checksum=$$(sed -e 's/^[[:space:]]*//g' \
+ checksum=$$(sed -e 's/[[:space:]][[:space:]]*//g' \
-e 's/\#.*$$//' \
-e '/^$$/d' $$infile \
- | md5sum \
- | awk '{print $$1}'); \
+ | md5sum \
+ | awk '{print $$1}'); \
if [ x"$$inchecksum" = x"$$checksum" ]; then \
- echo "Verified: $$infile"; \
+ echo "%% (VERIFIED) $$checksum $$innobdir" >> $(3); \
else \
$(call verify-print-error-start); \
$(call verify-print-tips); \
@@ -105,11 +117,20 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
# Make sure that verification is actually requested.
if [ x"$(verify-outputs)" = xyes ]; then
+ # Make sure the temporary output doesn't exist (because we want
+ # to append to it). We are making a temporary output target so if
+ # there is a crash in the middle, Make will not continue. If we
+ # write in the final target progressively, the file will exist,
+ # and its date will be more recent than all prerequisites, so
+ # next time the project is run, Make will continue and ignore the
+ # rest of the checks.
+ rm -f $@.tmp
+
# Verify the figure datasets.
- $(call verify-txt-no-comments-leading-space, \
- $(delete-num), ad345e873e6af577f0e4e7c8942cdf08)
- $(call verify-txt-no-comments-leading-space, \
- $(delete-histogram), 12a81c4c8c5f552e5ed5686453587fe8)
+ $(call verify-txt-no-comments-no-space, \
+ $(dm-squared), 6b6d3b0f9c351de53606507b59bca5d1, $@.tmp)
+ $(call verify-txt-no-comments-no-space, \
+ $(dm-img-histogram), b1f9c413f915a1ad96078fee8767b16c, $@.tmp)
# Verify TeX macros (the values that go into the PDF text).
for m in $(verify-check); do
@@ -118,9 +139,11 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705
else echo; echo "'$$m' not recognized."; exit 1
fi
- $(call verify-txt-no-comments-leading-space, $$file, $$s)
+ $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp)
done
- fi
- # Make an empty final target.
- touch $@
+ # Move temporary file to final target.
+ mv $@.tmp $@
+ else
+ echo "% Verification was DISABLED!" > $@
+ fi