3 files changed, 216 insertions, 66 deletions
diff --git a/reproduce/analysis/make/delete-me.mk b/reproduce/analysis/make/delete-me.mk
index fa16102..f45f9ea 100644
--- a/reproduce/analysis/make/delete-me.mk
+++ b/reproduce/analysis/make/delete-me.mk
@@ -22,18 +22,40 @@
 # Dummy dataset
 # -------------
 #
-# We will use AWK to generate a table showing X and X^2 and draw its plot.
-delete-numdir = $(texdir)/delete-me-num
-delete-num    = $(delete-numdir)/data.txt
-$(delete-numdir): | $(texdir); mkdir $@
-$(delete-num): $(pconfdir)/delete-me-num.conf | $(delete-numdir)
+# Just as a demonstration(!): we will use AWK to generate a table showing X
+# and X^2 and draw its plot.
+#
+# Note that this dataset is directly read by LaTeX to generate a plot, so
+# we need to put it in the $(tex-publish-dir) directory.
+dm-squared = $(tex-publish-dir)/squared.txt
+$(dm-squared): $(pconfdir)/delete-me-squared-num.conf | $(tex-publish-dir)
 
         # When the plotted values are re-made, it is necessary to also
-        # delete the TiKZ externalized files so the plot is also re-made.
-	rm -f $(tikzdir)/delete-me.pdf
+        # delete the TiKZ externalized files so the plot is also re-made by
+        # PGFPlots.
+	rm -f $(tikzdir)/delete-me-squared.pdf
+
+        # Write the column metadata in a temporary file name (appending
+        # '.tmp' to the actual target name). Once all steps are done, it is
+        # renamed to the final target. We do this because if there is an
+        # error in the middle, Make will not consider the job to be
+        # complete and will stop here.
+	echo "# Data for demonstration plot of default Maneage (MANaging data linEAGE)." > $@.tmp
+	echo "# It is a simple plot, showing the power of two: y=x^2! " >> $@.tmp
+	echo "# " >> $@.tmp
+	echo "# Column 1: X       [arbitrary, f32] The horizontal axis numbers." \
+	     >> $@.tmp
+	echo "# Column 2: X_POW2  [arbitrary, f32] The horizontal axis to the power of two." \
+	     >> $@.tmp
+	echo "# " >> $@.tmp
+	$(call print-copyright, $@.tmp)
 
         # Generate the table of random values.
-	awk 'BEGIN {for(i=1;i<=$(delete-me-num);i+=0.5) print i, i*i; }' > $@
+	awk 'BEGIN {for(i=1;i<=$(delete-me-squared-num);i+=0.5) \
+	              printf("%-8.1f%.2f\n", i, i*i); }' >> $@.tmp
+
+        # Write it into the final target
+	mv $@.tmp $@
 
 
 
@@ -44,14 +66,14 @@ $(delete-num): $(pconfdir)/delete-me-num.conf | $(delete-numdir)
 #
 # For an example image, we'll make a PDF copy of the WFPC II image to
 # display in the paper.
-delete-demodir = $(texdir)/delete-me-demo
-$(delete-demodir): | $(texdir); mkdir $@
-delete-pdf = $(delete-demodir)/wfpc2.pdf
-$(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir)
+dm-histdir = $(texdir)/image-histogram
+$(dm-histdir): | $(texdir); mkdir $@
+dm-img-pdf = $(dm-histdir)/wfpc2.pdf
+$(dm-img-pdf): $(dm-histdir)/%.pdf: $(indir)/%.fits | $(dm-histdir)
 
         # When the plotted values are re-made, it is necessary to also
         # delete the TiKZ externalized files so the plot is also re-made.
-	rm -f $(tikzdir)/delete-me-wfpc2.pdf
+	rm -f $(tikzdir)/delete-me-image-histogram.pdf
 
         # Convert the dataset to a PDF.
 	astconvertt --colormap=gray --fluxhigh=4 $< -h0 -o$@
@@ -63,17 +85,35 @@ $(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir)
 # Histogram of WFPC2 image
 # ------------------------
 #
-# For an example plot, we'll show the pixel value histogram also.
-delete-histogram = $(delete-demodir)/wfpc2-hist.txt
-$(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \
-                     | $(delete-demodir)
+# For an example plot, we'll show the pixel value histogram also. IMPORTANT
+# NOTE: because this histogram contains data that is included in a plot, we
+# should publish it, so it will go into the $(tex-publish-dir).
+dm-img-histogram = $(tex-publish-dir)/wfpc2-histogram.txt
+$(dm-img-histogram): $(tex-publish-dir)/%-histogram.txt: $(indir)/%.fits \
+                     | $(tex-publish-dir)
 
         # When the plotted values are re-made, it is necessary to also
         # delete the TiKZ externalized files so the plot is also re-made.
-	rm -f $(tikzdir)/delete-me-wfpc2.pdf
+	rm -f $(tikzdir)/delete-me-image-histogram.pdf
+
+        # Generate the pixel value histogram.
+	aststatistics --lessthan=5 $< -h0 --histogram -o$@.data
+
+        # Put a two-line description of the dataset, copy the column
+        # metadata from '$@.data', and add copyright.
+	echo "# Histogram of example image to demonstrate Maneage (MANaging data linEAGE)." \
+	     > $@.tmp
+	echo "# Example image URL: $(WFPC2URL)/$(WFPC2IMAGE)" >> $@.tmp
+	echo "# " >> $@.tmp
+	awk '/^# Column .:/' $@.data >> $@.tmp
+	echo "# " >> $@.tmp
+	$(call print-copyright, $@.tmp)
 
-        # Generate the pixel value distribution
-	aststatistics --lessthan=5 $< -h0 --histogram -o$@
+        # Add the column numbers in a formatted manner, rename it to the
+        # output and clean up.
+	awk '!/^#/{printf("%-15.4f%d\n", $$1, $$2)}' $@.data >> $@.tmp
+	mv $@.tmp $@
+	rm $@.data
 
 
 
@@ -84,9 +124,9 @@ $(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \
 #
 # This is just as a demonstration on how to get analysic configuration
 # parameters from variables defined in `reproduce/analysis/config/'.
-delete-stats = $(delete-demodir)/wfpc2-stats.txt
-$(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \
-                 | $(delete-demodir)
+dm-img-stats = $(dm-histdir)/wfpc2-stats.txt
+$(dm-img-stats): $(dm-histdir)/%-stats.txt: $(indir)/%.fits \
+                 | $(dm-histdir)
 	aststatistics $< -h0 --mean --median > $@
 
 
@@ -100,11 +140,11 @@ $(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \
 #
 # NOTE: In LaTeX you cannot use any non-alphabetic character in a variable
 # name.
-$(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \
-                          $(delete-stats)
+$(mtexdir)/delete-me.tex: $(dm-squared) $(dm-img-pdf) $(dm-img-histogram) \
+                          $(dm-img-stats)
 
         # Write the number of random values used.
-	echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@
+	echo "\newcommand{\deletemenum}{$(delete-me-squared-num)}" > $@
 
         # Note that since Make variables start with a `$(', if you want to
         # use `$' within the shell (not Make), you have to quote any
@@ -116,14 +156,14 @@ $(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \
         # macro definition.
 	mm=$$(awk 'BEGIN{min=99999; max=-min}
 	           !/^#/{if($$2>max) max=$$2; if($$2<min) min=$$2;}
-	           END{print min, max}' $(delete-num));
+	           END{print min, max}' $(dm-squared));
 	v=$$(echo "$$mm" | awk '{printf "%.3f", $$1}');
 	echo "\newcommand{\deletememin}{$$v}"             >> $@
 	v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}');
 	echo "\newcommand{\deletememax}{$$v}"             >> $@
 
         # Write the statistics of the WFPC2 image as a macro.
-	mean=$$(awk     '{printf("%.2f", $$1)}' $(delete-stats))
+	mean=$$(awk     '{printf("%.2f", $$1)}' $(dm-img-stats))
 	echo "\newcommand{\deletemewfpctwomean}{$$mean}"          >> $@
-	median=$$(awk   '{printf("%.2f", $$2)}' $(delete-stats))
+	median=$$(awk   '{printf("%.2f", $$2)}' $(dm-img-stats))
 	echo "\newcommand{\deletemewfpctwomedian}{$$median}"      >> $@
diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk
index 4e317bb..19447a6 100644
--- a/reproduce/analysis/make/initialize.mk
+++ b/reproduce/analysis/make/initialize.mk
@@ -202,6 +202,16 @@ $(lockdir): | $(BDIR); mkdir $@
 
 
 
+# Version and distribution tarball definitions
+project-commit-hash := $(shell if [ -d .git ]; then \
+    echo $$(git describe --dirty --always --long); else echo NOGIT; fi)
+project-package-name := maneaged-$(project-commit-hash)
+project-package-contents = $(texdir)/$(project-package-name)
+
+
+
+
+
 # High-level Makefile management
 # ------------------------------
 #
@@ -212,11 +222,8 @@ $(lockdir): | $(BDIR); mkdir $@
 # we want to ensure that the file is always built in every run: it contains
 # the project version which may change between two separate runs, even when
 # no file actually differs.
-packagebasename := $(shell if [ -d .git ]; then \
-    echo paper-$$(git describe --dirty --always --long); else echo NOGIT; fi)
-packagecontents = $(texdir)/$(packagebasename)
-.PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \
-        $(mtexdir)/initialize.tex
+.PHONY: all clean dist dist-zip dist-lzip distclean clean-mmap \
+        $(project-package-contents) $(mtexdir)/initialize.tex
 
 # --------- Delete for no Gnuastro ---------
 clean-mmap:; rm -f reproduce/config/gnuastro/mmap*
@@ -260,11 +267,11 @@ distclean: clean
 # that is ready for building the final PDF with LaTeX. This is useful for
 # collaborators who only want to contribute to the text of your project,
 # without having to worry about the technicalities of the analysis.
-$(packagecontents): paper.pdf | $(texdir)
+$(project-package-contents): paper.pdf | $(texdir)
 
         # Set up the output directory, delete it if it exists and remake it
         # to fill with new contents.
-	dir=$(texdir)/$(packagebasename)
+	dir=$@
 	rm -rf $$dir
 	mkdir $$dir
 
@@ -298,7 +305,7 @@ $(packagecontents): paper.pdf | $(texdir)
 	cp -r tex/src                            $$dir/tex/src
 	cp tex/tikz/*.pdf                        $$dir/tex/tikz
 	cp -r reproduce/*                        $$dir/reproduce
-	cp -r tex/build/!(paper-v*)              $$dir/tex/build
+	cp -r tex/build/!($(project-package-name)) $$dir/tex/build
 
         # Clean up un-necessary/local files: 1) the $(texdir)/build*
         # directories (when building in a group structure, there will be
@@ -337,32 +344,113 @@ $(packagecontents): paper.pdf | $(texdir)
 
         # Clean temporary (currently those ending in `~') files.
 	cd $(texdir)
-	find $(packagebasename) -name \*~ -delete
-	find $(packagebasename) -name \*.swp -delete
+	find $(project-package-name) -name \*~ -delete
+	find $(project-package-name) -name \*.swp -delete
 
         # PROJECT SPECIFIC
         # ----------------
         # Put any project specific distribution steps here.
         # ----------------
 
-# Package into `.tar.gz'.
-dist: $(packagecontents)
+# Package into `.tar.gz' or '.tar.lz'.
+dist dist-lzip: $(project-package-contents)
 	curdir=$$(pwd)
 	cd $(texdir)
-	tar -cf $(packagebasename).tar $(packagebasename)
-	gzip -f --best $(packagebasename).tar
-	rm -rf $(packagebasename)
+	tar -cf $(project-package-name).tar $(project-package-name)
+	if [ $@ = dist ]; then
+	  suffix=gz
+	  gzip -f --best $(project-package-name).tar
+	elif [ $@ = dist-lzip ]; then
+	  suffix=lz
+	  lzip -f --best $(project-package-name).tar
+	fi
+	rm -rf $(project-package-name)
 	cd $$curdir
-	mv $(texdir)/$(packagebasename).tar.gz ./
+	mv $(texdir)/$(project-package-name).tar.$$suffix ./
 
 # Package into `.zip'.
-dist-zip: $(packagecontents)
+dist-zip: $(project-package-contents)
 	curdir=$$(pwd)
 	cd $(texdir)
-	zip -q -r $(packagebasename).zip $(packagebasename)
-	rm -rf $(packagebasename)
+	zip -q -r $(project-package-name).zip $(project-package-name)
+	rm -rf $(project-package-name)
+	cd $$curdir
+	mv $(texdir)/$(project-package-name).zip ./
+
+# Package the software tarballs.
+dist-software:
+	curdir=$$(pwd)
+	cd $(BDIR)
+	if [ -d .git ]; then
+	  dirname="software-$$(git describe --dirty --always --long)"
+	else
+	  dirname="software-NOGIT";
+	fi
+	mkdir $$dirname
+	cp -L software/tarballs/* $$dirname/
+	tar -cf $$dirname.tar $$dirname
+	gzip -f --best $$dirname.tar
+	rm -rf $$dirname
 	cd $$curdir
-	mv $(texdir)/$(packagebasename).zip ./
+	mv $(BDIR)/$$dir.tar.gz ./
+
+
+
+
+
+# Directory containing to-be-published datasets
+# ---------------------------------------------
+#
+# Its good practice (so you don't forget in the last moment!) to have all
+# the plot/figure/table data that you ultimately want to publish in a
+# single directory.
+#
+# There are two types of to-publish data in the project.
+#
+#  1. Those data that also go into LaTeX (for example to give to LateX's
+#     PGFPlots package to create the plot internally) should be under the
+#     '$(BDIR)/tex' directory (because other LaTeX producers may also need
+#     it for example when using './project make dist'). The contents of
+#     this directory are directly taken into the tarball.
+#
+#  2. The data that aren't included directly in the LaTeX run of the paper,
+#     can be seen as supplements. A good place to keep them is under your
+#     build-directory.
+#
+# RECOMMENDATION: don't put the figure/plot/table number in the names of
+# your to-be-published datasets! Given them a descriptive/short name that
+# would be clear to anyone who has read the paper. Later, in the caption
+# (or paper's tex/appendix), you will put links to the dataset on servers
+# like Zenodo (see the "Publication checklist" in 'README-hacking.md').
+tex-publish-dir = $(texdir)/to-publish
+data-publish-dir = $(BDIR)/data-to-publish
+$(tex-publish-dir):; mkdir $@
+$(data-publish-dir):; mkdir $@
+
+
+
+
+
+# Print Copyright statement
+# -------------------------
+#
+# This statement can be used in published datasets that are in plain-text
+# format. It assumes you have already put the data-specific statements in
+# its first argument, it will supplement them with general project links.
+print-copyright = \
+	echo "\# Project title: $(metadata-title)" >> $(1); \
+	echo "\# Git commit (that produced this dataset): $(project-commit-hash)" >> $(1); \
+	echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \
+	if [ x$(metadata-arxiv) != x ]; then \
+	  echo "\# Pre-print server: arXiv:$(metadata-arxiv)" >> $(1); fi; \
+	if [ x$(metadata-doi-journal) != x ]; then \
+	  echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \
+	if [ x$(metadata-doi-zenodo) != x ]; then \
+	echo "\# DOI (Zenodo): $(metadata-doi-zenodo)" >> $(1); fi; \
+	echo "\#" >> $(1); \
+	echo "\# Copyright (C) $$(date +%Y) $(metadata-copyright-owner)" >> $(1); \
+	echo "\# Dataset is available under $(metadata-copyright)." >> $(1); \
+	echo "\# License URL: $(metadata-copyright-url)" >> $(1);
 
 
 
@@ -377,7 +465,6 @@ dist-zip: $(packagecontents)
 # actually exists, it is also aded as a `.PHONY' target above.
 $(mtexdir)/initialize.tex: | $(mtexdir)
 
-        # Version of the project.
-	@if [ -d .git ]; then v=$$(git describe --dirty --always --long);
-	else                  v=NO-GIT; fi
-	echo "\newcommand{\projectversion}{$$v}" > $@
+        # Version and title of project.
+	echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@
+	echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@
diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk
index 43d1472..67b3fea 100644
--- a/reproduce/analysis/make/verify.mk
+++ b/reproduce/analysis/make/verify.mk
@@ -40,22 +40,34 @@ verify-print-tips = \
   echo "the following project source file:"; \
   echo "    reproduce/analysis/make/verify.mk"
 
-verify-txt-no-comments-leading-space = \
+# Removes following components of a plain-text file, calculates checksum
+# and compares with given checksum:
+#   - All commented lines (starting with '#') are removed.
+#   - All empty lines are removed.
+#   - All space-characters in remaining lines are removed (so the width of
+#     the printed columns won't invalidate the verification).
+#
+# It takes three arguments:
+#   - First argument: Full address of file to check.
+#   - Second argument: Expected checksum of the file to check.
+#   - File name to write result.
+verify-txt-no-comments-no-space = \
   infile=$(strip $(1)); \
   inchecksum=$(strip $(2)); \
+  innobdir=$$(echo $$infile | sed -e's|$(BDIR)/||g'); \
   if ! [ -f "$$infile" ]; then \
     $(call verify-print-error-start); \
     echo "The following file (that should be verified) doesn't exist:"; \
     echo "    $$infile"; \
     echo; exit 1; \
   fi; \
-  checksum=$$(sed -e 's/^[[:space:]]*//g' \
+  checksum=$$(sed -e 's/[[:space:]][[:space:]]*//g' \
                   -e 's/\#.*$$//' \
                   -e '/^$$/d' $$infile \
-	          | md5sum \
-	          | awk '{print $$1}'); \
+                  | md5sum \
+                  | awk '{print $$1}'); \
   if [ x"$$inchecksum" = x"$$checksum" ]; then \
-    echo "Verified: $$infile"; \
+    echo "%% (VERIFIED) $$checksum $$innobdir" >> $(3); \
   else \
     $(call verify-print-error-start); \
     $(call verify-print-tips); \
@@ -105,11 +117,20 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
         # Make sure that verification is actually requested.
 	if [ x"$(verify-outputs)" = xyes ]; then
 
+          # Make sure the temporary output doesn't exist (because we want
+          # to append to it). We are making a temporary output target so if
+          # there is a crash in the middle, Make will not continue. If we
+          # write in the final target progressively, the file will exist,
+          # and its date will be more recent than all prerequisites, so
+          # next time the project is run, Make will continue and ignore the
+          # rest of the checks.
+	  rm -f $@.tmp
+
           # Verify the figure datasets.
-	  $(call verify-txt-no-comments-leading-space, \
-	         $(delete-num), ad345e873e6af577f0e4e7c8942cdf08)
-	  $(call verify-txt-no-comments-leading-space, \
-	         $(delete-histogram), 12a81c4c8c5f552e5ed5686453587fe8)
+	  $(call verify-txt-no-comments-no-space, \
+	         $(dm-squared), 6b6d3b0f9c351de53606507b59bca5d1, $@.tmp)
+	  $(call verify-txt-no-comments-no-space, \
+	         $(dm-img-histogram), b1f9c413f915a1ad96078fee8767b16c, $@.tmp)
 
           # Verify TeX macros (the values that go into the PDF text).
 	  for m in $(verify-check); do
@@ -118,9 +139,11 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
 	    elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705
 	    else echo; echo "'$$m' not recognized."; exit 1
 	    fi
-	    $(call verify-txt-no-comments-leading-space, $$file, $$s)
+	    $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp)
 	  done
-	fi
 
-        # Make an empty final target.
-	touch $@
+          # Move temporary file to final target.
+	  mv $@.tmp $@
+	else
+	  echo "% Verification was DISABLED!" > $@
+	fi