diff options
Diffstat (limited to 'reproduce/analysis/make')
| -rw-r--r-- | reproduce/analysis/make/delete-me.mk | 98 | ||||
| -rw-r--r-- | reproduce/analysis/make/initialize.mk | 135 | ||||
| -rw-r--r-- | reproduce/analysis/make/verify.mk | 49 | 
3 files changed, 216 insertions, 66 deletions
diff --git a/reproduce/analysis/make/delete-me.mk b/reproduce/analysis/make/delete-me.mk index fa16102..f45f9ea 100644 --- a/reproduce/analysis/make/delete-me.mk +++ b/reproduce/analysis/make/delete-me.mk @@ -22,18 +22,40 @@  # Dummy dataset  # -------------  # -# We will use AWK to generate a table showing X and X^2 and draw its plot. -delete-numdir = $(texdir)/delete-me-num -delete-num    = $(delete-numdir)/data.txt -$(delete-numdir): | $(texdir); mkdir $@ -$(delete-num): $(pconfdir)/delete-me-num.conf | $(delete-numdir) +# Just as a demonstration(!): we will use AWK to generate a table showing X +# and X^2 and draw its plot. +# +# Note that this dataset is directly read by LaTeX to generate a plot, so +# we need to put it in the $(tex-publish-dir) directory. +dm-squared = $(tex-publish-dir)/squared.txt +$(dm-squared): $(pconfdir)/delete-me-squared-num.conf | $(tex-publish-dir)          # When the plotted values are re-made, it is necessary to also -        # delete the TiKZ externalized files so the plot is also re-made. -	rm -f $(tikzdir)/delete-me.pdf +        # delete the TiKZ externalized files so the plot is also re-made by +        # PGFPlots. +	rm -f $(tikzdir)/delete-me-squared.pdf + +        # Write the column metadata in a temporary file name (appending +        # '.tmp' to the actual target name). Once all steps are done, it is +        # renamed to the final target. We do this because if there is an +        # error in the middle, Make will not consider the job to be +        # complete and will stop here. +	echo "# Data for demonstration plot of default Maneage (MANaging data linEAGE)." > $@.tmp +	echo "# It is a simple plot, showing the power of two: y=x^2! " >> $@.tmp +	echo "# " >> $@.tmp +	echo "# Column 1: X       [arbitrary, f32] The horizontal axis numbers." \ +	     >> $@.tmp +	echo "# Column 2: X_POW2  [arbitrary, f32] The horizontal axis to the power of two." \ +	     >> $@.tmp +	echo "# " >> $@.tmp +	$(call print-copyright, $@.tmp)          # Generate the table of random values. -	awk 'BEGIN {for(i=1;i<=$(delete-me-num);i+=0.5) print i, i*i; }' > $@ +	awk 'BEGIN {for(i=1;i<=$(delete-me-squared-num);i+=0.5) \ +	              printf("%-8.1f%.2f\n", i, i*i); }' >> $@.tmp + +        # Write it into the final target +	mv $@.tmp $@ @@ -44,14 +66,14 @@ $(delete-num): $(pconfdir)/delete-me-num.conf | $(delete-numdir)  #  # For an example image, we'll make a PDF copy of the WFPC II image to  # display in the paper. -delete-demodir = $(texdir)/delete-me-demo -$(delete-demodir): | $(texdir); mkdir $@ -delete-pdf = $(delete-demodir)/wfpc2.pdf -$(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir) +dm-histdir = $(texdir)/image-histogram +$(dm-histdir): | $(texdir); mkdir $@ +dm-img-pdf = $(dm-histdir)/wfpc2.pdf +$(dm-img-pdf): $(dm-histdir)/%.pdf: $(indir)/%.fits | $(dm-histdir)          # When the plotted values are re-made, it is necessary to also          # delete the TiKZ externalized files so the plot is also re-made. -	rm -f $(tikzdir)/delete-me-wfpc2.pdf +	rm -f $(tikzdir)/delete-me-image-histogram.pdf          # Convert the dataset to a PDF.  	astconvertt --colormap=gray --fluxhigh=4 $< -h0 -o$@ @@ -63,17 +85,35 @@ $(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir)  # Histogram of WFPC2 image  # ------------------------  # -# For an example plot, we'll show the pixel value histogram also. -delete-histogram = $(delete-demodir)/wfpc2-hist.txt -$(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \ -                     | $(delete-demodir) +# For an example plot, we'll show the pixel value histogram also. IMPORTANT +# NOTE: because this histogram contains data that is included in a plot, we +# should publish it, so it will go into the $(tex-publish-dir). +dm-img-histogram = $(tex-publish-dir)/wfpc2-histogram.txt +$(dm-img-histogram): $(tex-publish-dir)/%-histogram.txt: $(indir)/%.fits \ +                     | $(tex-publish-dir)          # When the plotted values are re-made, it is necessary to also          # delete the TiKZ externalized files so the plot is also re-made. -	rm -f $(tikzdir)/delete-me-wfpc2.pdf +	rm -f $(tikzdir)/delete-me-image-histogram.pdf + +        # Generate the pixel value histogram. +	aststatistics --lessthan=5 $< -h0 --histogram -o$@.data + +        # Put a two-line description of the dataset, copy the column +        # metadata from '$@.data', and add copyright. +	echo "# Histogram of example image to demonstrate Maneage (MANaging data linEAGE)." \ +	     > $@.tmp +	echo "# Example image URL: $(WFPC2URL)/$(WFPC2IMAGE)" >> $@.tmp +	echo "# " >> $@.tmp +	awk '/^# Column .:/' $@.data >> $@.tmp +	echo "# " >> $@.tmp +	$(call print-copyright, $@.tmp) -        # Generate the pixel value distribution -	aststatistics --lessthan=5 $< -h0 --histogram -o$@ +        # Add the column numbers in a formatted manner, rename it to the +        # output and clean up. +	awk '!/^#/{printf("%-15.4f%d\n", $$1, $$2)}' $@.data >> $@.tmp +	mv $@.tmp $@ +	rm $@.data @@ -84,9 +124,9 @@ $(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \  #  # This is just as a demonstration on how to get analysic configuration  # parameters from variables defined in `reproduce/analysis/config/'. -delete-stats = $(delete-demodir)/wfpc2-stats.txt -$(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \ -                 | $(delete-demodir) +dm-img-stats = $(dm-histdir)/wfpc2-stats.txt +$(dm-img-stats): $(dm-histdir)/%-stats.txt: $(indir)/%.fits \ +                 | $(dm-histdir)  	aststatistics $< -h0 --mean --median > $@ @@ -100,11 +140,11 @@ $(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \  #  # NOTE: In LaTeX you cannot use any non-alphabetic character in a variable  # name. -$(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \ -                          $(delete-stats) +$(mtexdir)/delete-me.tex: $(dm-squared) $(dm-img-pdf) $(dm-img-histogram) \ +                          $(dm-img-stats)          # Write the number of random values used. -	echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@ +	echo "\newcommand{\deletemenum}{$(delete-me-squared-num)}" > $@          # Note that since Make variables start with a `$(', if you want to          # use `$' within the shell (not Make), you have to quote any @@ -116,14 +156,14 @@ $(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \          # macro definition.  	mm=$$(awk 'BEGIN{min=99999; max=-min}  	           !/^#/{if($$2>max) max=$$2; if($$2<min) min=$$2;} -	           END{print min, max}' $(delete-num)); +	           END{print min, max}' $(dm-squared));  	v=$$(echo "$$mm" | awk '{printf "%.3f", $$1}');  	echo "\newcommand{\deletememin}{$$v}"             >> $@  	v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}');  	echo "\newcommand{\deletememax}{$$v}"             >> $@          # Write the statistics of the WFPC2 image as a macro. -	mean=$$(awk     '{printf("%.2f", $$1)}' $(delete-stats)) +	mean=$$(awk     '{printf("%.2f", $$1)}' $(dm-img-stats))  	echo "\newcommand{\deletemewfpctwomean}{$$mean}"          >> $@ -	median=$$(awk   '{printf("%.2f", $$2)}' $(delete-stats)) +	median=$$(awk   '{printf("%.2f", $$2)}' $(dm-img-stats))  	echo "\newcommand{\deletemewfpctwomedian}{$$median}"      >> $@ diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 4e317bb..19447a6 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -202,6 +202,16 @@ $(lockdir): | $(BDIR); mkdir $@ +# Version and distribution tarball definitions +project-commit-hash := $(shell if [ -d .git ]; then \ +    echo $$(git describe --dirty --always --long); else echo NOGIT; fi) +project-package-name := maneaged-$(project-commit-hash) +project-package-contents = $(texdir)/$(project-package-name) + + + + +  # High-level Makefile management  # ------------------------------  # @@ -212,11 +222,8 @@ $(lockdir): | $(BDIR); mkdir $@  # we want to ensure that the file is always built in every run: it contains  # the project version which may change between two separate runs, even when  # no file actually differs. -packagebasename := $(shell if [ -d .git ]; then \ -    echo paper-$$(git describe --dirty --always --long); else echo NOGIT; fi) -packagecontents = $(texdir)/$(packagebasename) -.PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \ -        $(mtexdir)/initialize.tex +.PHONY: all clean dist dist-zip dist-lzip distclean clean-mmap \ +        $(project-package-contents) $(mtexdir)/initialize.tex  # --------- Delete for no Gnuastro ---------  clean-mmap:; rm -f reproduce/config/gnuastro/mmap* @@ -260,11 +267,11 @@ distclean: clean  # that is ready for building the final PDF with LaTeX. This is useful for  # collaborators who only want to contribute to the text of your project,  # without having to worry about the technicalities of the analysis. -$(packagecontents): paper.pdf | $(texdir) +$(project-package-contents): paper.pdf | $(texdir)          # Set up the output directory, delete it if it exists and remake it          # to fill with new contents. -	dir=$(texdir)/$(packagebasename) +	dir=$@  	rm -rf $$dir  	mkdir $$dir @@ -298,7 +305,7 @@ $(packagecontents): paper.pdf | $(texdir)  	cp -r tex/src                            $$dir/tex/src  	cp tex/tikz/*.pdf                        $$dir/tex/tikz  	cp -r reproduce/*                        $$dir/reproduce -	cp -r tex/build/!(paper-v*)              $$dir/tex/build +	cp -r tex/build/!($(project-package-name)) $$dir/tex/build          # Clean up un-necessary/local files: 1) the $(texdir)/build*          # directories (when building in a group structure, there will be @@ -337,32 +344,113 @@ $(packagecontents): paper.pdf | $(texdir)          # Clean temporary (currently those ending in `~') files.  	cd $(texdir) -	find $(packagebasename) -name \*~ -delete -	find $(packagebasename) -name \*.swp -delete +	find $(project-package-name) -name \*~ -delete +	find $(project-package-name) -name \*.swp -delete          # PROJECT SPECIFIC          # ----------------          # Put any project specific distribution steps here.          # ---------------- -# Package into `.tar.gz'. -dist: $(packagecontents) +# Package into `.tar.gz' or '.tar.lz'. +dist dist-lzip: $(project-package-contents)  	curdir=$$(pwd)  	cd $(texdir) -	tar -cf $(packagebasename).tar $(packagebasename) -	gzip -f --best $(packagebasename).tar -	rm -rf $(packagebasename) +	tar -cf $(project-package-name).tar $(project-package-name) +	if [ $@ = dist ]; then +	  suffix=gz +	  gzip -f --best $(project-package-name).tar +	elif [ $@ = dist-lzip ]; then +	  suffix=lz +	  lzip -f --best $(project-package-name).tar +	fi +	rm -rf $(project-package-name)  	cd $$curdir -	mv $(texdir)/$(packagebasename).tar.gz ./ +	mv $(texdir)/$(project-package-name).tar.$$suffix ./  # Package into `.zip'. -dist-zip: $(packagecontents) +dist-zip: $(project-package-contents)  	curdir=$$(pwd)  	cd $(texdir) -	zip -q -r $(packagebasename).zip $(packagebasename) -	rm -rf $(packagebasename) +	zip -q -r $(project-package-name).zip $(project-package-name) +	rm -rf $(project-package-name) +	cd $$curdir +	mv $(texdir)/$(project-package-name).zip ./ + +# Package the software tarballs. +dist-software: +	curdir=$$(pwd) +	cd $(BDIR) +	if [ -d .git ]; then +	  dirname="software-$$(git describe --dirty --always --long)" +	else +	  dirname="software-NOGIT"; +	fi +	mkdir $$dirname +	cp -L software/tarballs/* $$dirname/ +	tar -cf $$dirname.tar $$dirname +	gzip -f --best $$dirname.tar +	rm -rf $$dirname  	cd $$curdir -	mv $(texdir)/$(packagebasename).zip ./ +	mv $(BDIR)/$$dir.tar.gz ./ + + + + + +# Directory containing to-be-published datasets +# --------------------------------------------- +# +# Its good practice (so you don't forget in the last moment!) to have all +# the plot/figure/table data that you ultimately want to publish in a +# single directory. +# +# There are two types of to-publish data in the project. +# +#  1. Those data that also go into LaTeX (for example to give to LateX's +#     PGFPlots package to create the plot internally) should be under the +#     '$(BDIR)/tex' directory (because other LaTeX producers may also need +#     it for example when using './project make dist'). The contents of +#     this directory are directly taken into the tarball. +# +#  2. The data that aren't included directly in the LaTeX run of the paper, +#     can be seen as supplements. A good place to keep them is under your +#     build-directory. +# +# RECOMMENDATION: don't put the figure/plot/table number in the names of +# your to-be-published datasets! Given them a descriptive/short name that +# would be clear to anyone who has read the paper. Later, in the caption +# (or paper's tex/appendix), you will put links to the dataset on servers +# like Zenodo (see the "Publication checklist" in 'README-hacking.md'). +tex-publish-dir = $(texdir)/to-publish +data-publish-dir = $(BDIR)/data-to-publish +$(tex-publish-dir):; mkdir $@ +$(data-publish-dir):; mkdir $@ + + + + + +# Print Copyright statement +# ------------------------- +# +# This statement can be used in published datasets that are in plain-text +# format. It assumes you have already put the data-specific statements in +# its first argument, it will supplement them with general project links. +print-copyright = \ +	echo "\# Project title: $(metadata-title)" >> $(1); \ +	echo "\# Git commit (that produced this dataset): $(project-commit-hash)" >> $(1); \ +	echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \ +	if [ x$(metadata-arxiv) != x ]; then \ +	  echo "\# Pre-print server: arXiv:$(metadata-arxiv)" >> $(1); fi; \ +	if [ x$(metadata-doi-journal) != x ]; then \ +	  echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \ +	if [ x$(metadata-doi-zenodo) != x ]; then \ +	echo "\# DOI (Zenodo): $(metadata-doi-zenodo)" >> $(1); fi; \ +	echo "\#" >> $(1); \ +	echo "\# Copyright (C) $$(date +%Y) $(metadata-copyright-owner)" >> $(1); \ +	echo "\# Dataset is available under $(metadata-copyright)." >> $(1); \ +	echo "\# License URL: $(metadata-copyright-url)" >> $(1); @@ -377,7 +465,6 @@ dist-zip: $(packagecontents)  # actually exists, it is also aded as a `.PHONY' target above.  $(mtexdir)/initialize.tex: | $(mtexdir) -        # Version of the project. -	@if [ -d .git ]; then v=$$(git describe --dirty --always --long); -	else                  v=NO-GIT; fi -	echo "\newcommand{\projectversion}{$$v}" > $@ +        # Version and title of project. +	echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@ +	echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@ diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 43d1472..67b3fea 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -40,22 +40,34 @@ verify-print-tips = \    echo "the following project source file:"; \    echo "    reproduce/analysis/make/verify.mk" -verify-txt-no-comments-leading-space = \ +# Removes following components of a plain-text file, calculates checksum +# and compares with given checksum: +#   - All commented lines (starting with '#') are removed. +#   - All empty lines are removed. +#   - All space-characters in remaining lines are removed (so the width of +#     the printed columns won't invalidate the verification). +# +# It takes three arguments: +#   - First argument: Full address of file to check. +#   - Second argument: Expected checksum of the file to check. +#   - File name to write result. +verify-txt-no-comments-no-space = \    infile=$(strip $(1)); \    inchecksum=$(strip $(2)); \ +  innobdir=$$(echo $$infile | sed -e's|$(BDIR)/||g'); \    if ! [ -f "$$infile" ]; then \      $(call verify-print-error-start); \      echo "The following file (that should be verified) doesn't exist:"; \      echo "    $$infile"; \      echo; exit 1; \    fi; \ -  checksum=$$(sed -e 's/^[[:space:]]*//g' \ +  checksum=$$(sed -e 's/[[:space:]][[:space:]]*//g' \                    -e 's/\#.*$$//' \                    -e '/^$$/d' $$infile \ -	          | md5sum \ -	          | awk '{print $$1}'); \ +                  | md5sum \ +                  | awk '{print $$1}'); \    if [ x"$$inchecksum" = x"$$checksum" ]; then \ -    echo "Verified: $$infile"; \ +    echo "%% (VERIFIED) $$checksum $$innobdir" >> $(3); \    else \      $(call verify-print-error-start); \      $(call verify-print-tips); \ @@ -105,11 +117,20 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)          # Make sure that verification is actually requested.  	if [ x"$(verify-outputs)" = xyes ]; then +          # Make sure the temporary output doesn't exist (because we want +          # to append to it). We are making a temporary output target so if +          # there is a crash in the middle, Make will not continue. If we +          # write in the final target progressively, the file will exist, +          # and its date will be more recent than all prerequisites, so +          # next time the project is run, Make will continue and ignore the +          # rest of the checks. +	  rm -f $@.tmp +            # Verify the figure datasets. -	  $(call verify-txt-no-comments-leading-space, \ -	         $(delete-num), ad345e873e6af577f0e4e7c8942cdf08) -	  $(call verify-txt-no-comments-leading-space, \ -	         $(delete-histogram), 12a81c4c8c5f552e5ed5686453587fe8) +	  $(call verify-txt-no-comments-no-space, \ +	         $(dm-squared), 6b6d3b0f9c351de53606507b59bca5d1, $@.tmp) +	  $(call verify-txt-no-comments-no-space, \ +	         $(dm-img-histogram), b1f9c413f915a1ad96078fee8767b16c, $@.tmp)            # Verify TeX macros (the values that go into the PDF text).  	  for m in $(verify-check); do @@ -118,9 +139,11 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)  	    elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705  	    else echo; echo "'$$m' not recognized."; exit 1  	    fi -	    $(call verify-txt-no-comments-leading-space, $$file, $$s) +	    $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp)  	  done -	fi -        # Make an empty final target. -	touch $@ +          # Move temporary file to final target. +	  mv $@.tmp $@ +	else +	  echo "% Verification was DISABLED!" > $@ +	fi  | 
