diff options
Diffstat (limited to 'reproduce/analysis')
-rw-r--r-- | reproduce/analysis/config/delete-me-squared-num.conf (renamed from reproduce/analysis/config/delete-me-num.conf) | 2 | ||||
-rw-r--r-- | reproduce/analysis/config/metadata.conf | 25 | ||||
-rw-r--r-- | reproduce/analysis/make/delete-me.mk | 98 | ||||
-rw-r--r-- | reproduce/analysis/make/initialize.mk | 135 | ||||
-rw-r--r-- | reproduce/analysis/make/verify.mk | 49 |
5 files changed, 242 insertions, 67 deletions
diff --git a/reproduce/analysis/config/delete-me-num.conf b/reproduce/analysis/config/delete-me-squared-num.conf index a0260b8..c86f841 100644 --- a/reproduce/analysis/config/delete-me-num.conf +++ b/reproduce/analysis/config/delete-me-squared-num.conf @@ -6,4 +6,4 @@ # permitted in any medium without royalty provided the copyright notice and # this notice are preserved. This file is offered as-is, without any # warranty. -delete-me-num = 50 +delete-me-squared-num = 50 diff --git a/reproduce/analysis/config/metadata.conf b/reproduce/analysis/config/metadata.conf new file mode 100644 index 0000000..533d927 --- /dev/null +++ b/reproduce/analysis/config/metadata.conf @@ -0,0 +1,25 @@ +# Project meta-data that can be used in a project's output datasets and +# final paper. Please set the values here and use them in your analysis or +# paper, don't repeat them +# +# Copyright (C) 2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice and +# this notice are preserved. This file is offered as-is, without any +# warranty. + +# Project information +metadata-title = The project title goes here + +# DOIs and identifiers. +metadata-arxiv = +metadata-doi-zenodo = +metadata-doi-journal = +metadata-doi = $(metadata-doi-journal) +metadata-git-repository = http://git.maneage.org/project.git + +# DATA Copyright owner and license information. +metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org> +metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA) +metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0 diff --git a/reproduce/analysis/make/delete-me.mk b/reproduce/analysis/make/delete-me.mk index fa16102..f45f9ea 100644 --- a/reproduce/analysis/make/delete-me.mk +++ b/reproduce/analysis/make/delete-me.mk @@ -22,18 +22,40 @@ # Dummy dataset # ------------- # -# We will use AWK to generate a table showing X and X^2 and draw its plot. -delete-numdir = $(texdir)/delete-me-num -delete-num = $(delete-numdir)/data.txt -$(delete-numdir): | $(texdir); mkdir $@ -$(delete-num): $(pconfdir)/delete-me-num.conf | $(delete-numdir) +# Just as a demonstration(!): we will use AWK to generate a table showing X +# and X^2 and draw its plot. +# +# Note that this dataset is directly read by LaTeX to generate a plot, so +# we need to put it in the $(tex-publish-dir) directory. +dm-squared = $(tex-publish-dir)/squared.txt +$(dm-squared): $(pconfdir)/delete-me-squared-num.conf | $(tex-publish-dir) # When the plotted values are re-made, it is necessary to also - # delete the TiKZ externalized files so the plot is also re-made. - rm -f $(tikzdir)/delete-me.pdf + # delete the TiKZ externalized files so the plot is also re-made by + # PGFPlots. + rm -f $(tikzdir)/delete-me-squared.pdf + + # Write the column metadata in a temporary file name (appending + # '.tmp' to the actual target name). Once all steps are done, it is + # renamed to the final target. We do this because if there is an + # error in the middle, Make will not consider the job to be + # complete and will stop here. + echo "# Data for demonstration plot of default Maneage (MANaging data linEAGE)." > $@.tmp + echo "# It is a simple plot, showing the power of two: y=x^2! " >> $@.tmp + echo "# " >> $@.tmp + echo "# Column 1: X [arbitrary, f32] The horizontal axis numbers." \ + >> $@.tmp + echo "# Column 2: X_POW2 [arbitrary, f32] The horizontal axis to the power of two." \ + >> $@.tmp + echo "# " >> $@.tmp + $(call print-copyright, $@.tmp) # Generate the table of random values. - awk 'BEGIN {for(i=1;i<=$(delete-me-num);i+=0.5) print i, i*i; }' > $@ + awk 'BEGIN {for(i=1;i<=$(delete-me-squared-num);i+=0.5) \ + printf("%-8.1f%.2f\n", i, i*i); }' >> $@.tmp + + # Write it into the final target + mv $@.tmp $@ @@ -44,14 +66,14 @@ $(delete-num): $(pconfdir)/delete-me-num.conf | $(delete-numdir) # # For an example image, we'll make a PDF copy of the WFPC II image to # display in the paper. -delete-demodir = $(texdir)/delete-me-demo -$(delete-demodir): | $(texdir); mkdir $@ -delete-pdf = $(delete-demodir)/wfpc2.pdf -$(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir) +dm-histdir = $(texdir)/image-histogram +$(dm-histdir): | $(texdir); mkdir $@ +dm-img-pdf = $(dm-histdir)/wfpc2.pdf +$(dm-img-pdf): $(dm-histdir)/%.pdf: $(indir)/%.fits | $(dm-histdir) # When the plotted values are re-made, it is necessary to also # delete the TiKZ externalized files so the plot is also re-made. - rm -f $(tikzdir)/delete-me-wfpc2.pdf + rm -f $(tikzdir)/delete-me-image-histogram.pdf # Convert the dataset to a PDF. astconvertt --colormap=gray --fluxhigh=4 $< -h0 -o$@ @@ -63,17 +85,35 @@ $(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir) # Histogram of WFPC2 image # ------------------------ # -# For an example plot, we'll show the pixel value histogram also. -delete-histogram = $(delete-demodir)/wfpc2-hist.txt -$(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \ - | $(delete-demodir) +# For an example plot, we'll show the pixel value histogram also. IMPORTANT +# NOTE: because this histogram contains data that is included in a plot, we +# should publish it, so it will go into the $(tex-publish-dir). +dm-img-histogram = $(tex-publish-dir)/wfpc2-histogram.txt +$(dm-img-histogram): $(tex-publish-dir)/%-histogram.txt: $(indir)/%.fits \ + | $(tex-publish-dir) # When the plotted values are re-made, it is necessary to also # delete the TiKZ externalized files so the plot is also re-made. - rm -f $(tikzdir)/delete-me-wfpc2.pdf + rm -f $(tikzdir)/delete-me-image-histogram.pdf + + # Generate the pixel value histogram. + aststatistics --lessthan=5 $< -h0 --histogram -o$@.data + + # Put a two-line description of the dataset, copy the column + # metadata from '$@.data', and add copyright. + echo "# Histogram of example image to demonstrate Maneage (MANaging data linEAGE)." \ + > $@.tmp + echo "# Example image URL: $(WFPC2URL)/$(WFPC2IMAGE)" >> $@.tmp + echo "# " >> $@.tmp + awk '/^# Column .:/' $@.data >> $@.tmp + echo "# " >> $@.tmp + $(call print-copyright, $@.tmp) - # Generate the pixel value distribution - aststatistics --lessthan=5 $< -h0 --histogram -o$@ + # Add the column numbers in a formatted manner, rename it to the + # output and clean up. + awk '!/^#/{printf("%-15.4f%d\n", $$1, $$2)}' $@.data >> $@.tmp + mv $@.tmp $@ + rm $@.data @@ -84,9 +124,9 @@ $(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \ # # This is just as a demonstration on how to get analysic configuration # parameters from variables defined in `reproduce/analysis/config/'. -delete-stats = $(delete-demodir)/wfpc2-stats.txt -$(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \ - | $(delete-demodir) +dm-img-stats = $(dm-histdir)/wfpc2-stats.txt +$(dm-img-stats): $(dm-histdir)/%-stats.txt: $(indir)/%.fits \ + | $(dm-histdir) aststatistics $< -h0 --mean --median > $@ @@ -100,11 +140,11 @@ $(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \ # # NOTE: In LaTeX you cannot use any non-alphabetic character in a variable # name. -$(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \ - $(delete-stats) +$(mtexdir)/delete-me.tex: $(dm-squared) $(dm-img-pdf) $(dm-img-histogram) \ + $(dm-img-stats) # Write the number of random values used. - echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@ + echo "\newcommand{\deletemenum}{$(delete-me-squared-num)}" > $@ # Note that since Make variables start with a `$(', if you want to # use `$' within the shell (not Make), you have to quote any @@ -116,14 +156,14 @@ $(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \ # macro definition. mm=$$(awk 'BEGIN{min=99999; max=-min} !/^#/{if($$2>max) max=$$2; if($$2<min) min=$$2;} - END{print min, max}' $(delete-num)); + END{print min, max}' $(dm-squared)); v=$$(echo "$$mm" | awk '{printf "%.3f", $$1}'); echo "\newcommand{\deletememin}{$$v}" >> $@ v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}'); echo "\newcommand{\deletememax}{$$v}" >> $@ # Write the statistics of the WFPC2 image as a macro. - mean=$$(awk '{printf("%.2f", $$1)}' $(delete-stats)) + mean=$$(awk '{printf("%.2f", $$1)}' $(dm-img-stats)) echo "\newcommand{\deletemewfpctwomean}{$$mean}" >> $@ - median=$$(awk '{printf("%.2f", $$2)}' $(delete-stats)) + median=$$(awk '{printf("%.2f", $$2)}' $(dm-img-stats)) echo "\newcommand{\deletemewfpctwomedian}{$$median}" >> $@ diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 4e317bb..19447a6 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -202,6 +202,16 @@ $(lockdir): | $(BDIR); mkdir $@ +# Version and distribution tarball definitions +project-commit-hash := $(shell if [ -d .git ]; then \ + echo $$(git describe --dirty --always --long); else echo NOGIT; fi) +project-package-name := maneaged-$(project-commit-hash) +project-package-contents = $(texdir)/$(project-package-name) + + + + + # High-level Makefile management # ------------------------------ # @@ -212,11 +222,8 @@ $(lockdir): | $(BDIR); mkdir $@ # we want to ensure that the file is always built in every run: it contains # the project version which may change between two separate runs, even when # no file actually differs. -packagebasename := $(shell if [ -d .git ]; then \ - echo paper-$$(git describe --dirty --always --long); else echo NOGIT; fi) -packagecontents = $(texdir)/$(packagebasename) -.PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \ - $(mtexdir)/initialize.tex +.PHONY: all clean dist dist-zip dist-lzip distclean clean-mmap \ + $(project-package-contents) $(mtexdir)/initialize.tex # --------- Delete for no Gnuastro --------- clean-mmap:; rm -f reproduce/config/gnuastro/mmap* @@ -260,11 +267,11 @@ distclean: clean # that is ready for building the final PDF with LaTeX. This is useful for # collaborators who only want to contribute to the text of your project, # without having to worry about the technicalities of the analysis. -$(packagecontents): paper.pdf | $(texdir) +$(project-package-contents): paper.pdf | $(texdir) # Set up the output directory, delete it if it exists and remake it # to fill with new contents. - dir=$(texdir)/$(packagebasename) + dir=$@ rm -rf $$dir mkdir $$dir @@ -298,7 +305,7 @@ $(packagecontents): paper.pdf | $(texdir) cp -r tex/src $$dir/tex/src cp tex/tikz/*.pdf $$dir/tex/tikz cp -r reproduce/* $$dir/reproduce - cp -r tex/build/!(paper-v*) $$dir/tex/build + cp -r tex/build/!($(project-package-name)) $$dir/tex/build # Clean up un-necessary/local files: 1) the $(texdir)/build* # directories (when building in a group structure, there will be @@ -337,32 +344,113 @@ $(packagecontents): paper.pdf | $(texdir) # Clean temporary (currently those ending in `~') files. cd $(texdir) - find $(packagebasename) -name \*~ -delete - find $(packagebasename) -name \*.swp -delete + find $(project-package-name) -name \*~ -delete + find $(project-package-name) -name \*.swp -delete # PROJECT SPECIFIC # ---------------- # Put any project specific distribution steps here. # ---------------- -# Package into `.tar.gz'. -dist: $(packagecontents) +# Package into `.tar.gz' or '.tar.lz'. +dist dist-lzip: $(project-package-contents) curdir=$$(pwd) cd $(texdir) - tar -cf $(packagebasename).tar $(packagebasename) - gzip -f --best $(packagebasename).tar - rm -rf $(packagebasename) + tar -cf $(project-package-name).tar $(project-package-name) + if [ $@ = dist ]; then + suffix=gz + gzip -f --best $(project-package-name).tar + elif [ $@ = dist-lzip ]; then + suffix=lz + lzip -f --best $(project-package-name).tar + fi + rm -rf $(project-package-name) cd $$curdir - mv $(texdir)/$(packagebasename).tar.gz ./ + mv $(texdir)/$(project-package-name).tar.$$suffix ./ # Package into `.zip'. -dist-zip: $(packagecontents) +dist-zip: $(project-package-contents) curdir=$$(pwd) cd $(texdir) - zip -q -r $(packagebasename).zip $(packagebasename) - rm -rf $(packagebasename) + zip -q -r $(project-package-name).zip $(project-package-name) + rm -rf $(project-package-name) + cd $$curdir + mv $(texdir)/$(project-package-name).zip ./ + +# Package the software tarballs. +dist-software: + curdir=$$(pwd) + cd $(BDIR) + if [ -d .git ]; then + dirname="software-$$(git describe --dirty --always --long)" + else + dirname="software-NOGIT"; + fi + mkdir $$dirname + cp -L software/tarballs/* $$dirname/ + tar -cf $$dirname.tar $$dirname + gzip -f --best $$dirname.tar + rm -rf $$dirname cd $$curdir - mv $(texdir)/$(packagebasename).zip ./ + mv $(BDIR)/$$dir.tar.gz ./ + + + + + +# Directory containing to-be-published datasets +# --------------------------------------------- +# +# Its good practice (so you don't forget in the last moment!) to have all +# the plot/figure/table data that you ultimately want to publish in a +# single directory. +# +# There are two types of to-publish data in the project. +# +# 1. Those data that also go into LaTeX (for example to give to LateX's +# PGFPlots package to create the plot internally) should be under the +# '$(BDIR)/tex' directory (because other LaTeX producers may also need +# it for example when using './project make dist'). The contents of +# this directory are directly taken into the tarball. +# +# 2. The data that aren't included directly in the LaTeX run of the paper, +# can be seen as supplements. A good place to keep them is under your +# build-directory. +# +# RECOMMENDATION: don't put the figure/plot/table number in the names of +# your to-be-published datasets! Given them a descriptive/short name that +# would be clear to anyone who has read the paper. Later, in the caption +# (or paper's tex/appendix), you will put links to the dataset on servers +# like Zenodo (see the "Publication checklist" in 'README-hacking.md'). +tex-publish-dir = $(texdir)/to-publish +data-publish-dir = $(BDIR)/data-to-publish +$(tex-publish-dir):; mkdir $@ +$(data-publish-dir):; mkdir $@ + + + + + +# Print Copyright statement +# ------------------------- +# +# This statement can be used in published datasets that are in plain-text +# format. It assumes you have already put the data-specific statements in +# its first argument, it will supplement them with general project links. +print-copyright = \ + echo "\# Project title: $(metadata-title)" >> $(1); \ + echo "\# Git commit (that produced this dataset): $(project-commit-hash)" >> $(1); \ + echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \ + if [ x$(metadata-arxiv) != x ]; then \ + echo "\# Pre-print server: arXiv:$(metadata-arxiv)" >> $(1); fi; \ + if [ x$(metadata-doi-journal) != x ]; then \ + echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \ + if [ x$(metadata-doi-zenodo) != x ]; then \ + echo "\# DOI (Zenodo): $(metadata-doi-zenodo)" >> $(1); fi; \ + echo "\#" >> $(1); \ + echo "\# Copyright (C) $$(date +%Y) $(metadata-copyright-owner)" >> $(1); \ + echo "\# Dataset is available under $(metadata-copyright)." >> $(1); \ + echo "\# License URL: $(metadata-copyright-url)" >> $(1); @@ -377,7 +465,6 @@ dist-zip: $(packagecontents) # actually exists, it is also aded as a `.PHONY' target above. $(mtexdir)/initialize.tex: | $(mtexdir) - # Version of the project. - @if [ -d .git ]; then v=$$(git describe --dirty --always --long); - else v=NO-GIT; fi - echo "\newcommand{\projectversion}{$$v}" > $@ + # Version and title of project. + echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@ + echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@ diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 43d1472..67b3fea 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -40,22 +40,34 @@ verify-print-tips = \ echo "the following project source file:"; \ echo " reproduce/analysis/make/verify.mk" -verify-txt-no-comments-leading-space = \ +# Removes following components of a plain-text file, calculates checksum +# and compares with given checksum: +# - All commented lines (starting with '#') are removed. +# - All empty lines are removed. +# - All space-characters in remaining lines are removed (so the width of +# the printed columns won't invalidate the verification). +# +# It takes three arguments: +# - First argument: Full address of file to check. +# - Second argument: Expected checksum of the file to check. +# - File name to write result. +verify-txt-no-comments-no-space = \ infile=$(strip $(1)); \ inchecksum=$(strip $(2)); \ + innobdir=$$(echo $$infile | sed -e's|$(BDIR)/||g'); \ if ! [ -f "$$infile" ]; then \ $(call verify-print-error-start); \ echo "The following file (that should be verified) doesn't exist:"; \ echo " $$infile"; \ echo; exit 1; \ fi; \ - checksum=$$(sed -e 's/^[[:space:]]*//g' \ + checksum=$$(sed -e 's/[[:space:]][[:space:]]*//g' \ -e 's/\#.*$$//' \ -e '/^$$/d' $$infile \ - | md5sum \ - | awk '{print $$1}'); \ + | md5sum \ + | awk '{print $$1}'); \ if [ x"$$inchecksum" = x"$$checksum" ]; then \ - echo "Verified: $$infile"; \ + echo "%% (VERIFIED) $$checksum $$innobdir" >> $(3); \ else \ $(call verify-print-error-start); \ $(call verify-print-tips); \ @@ -105,11 +117,20 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) # Make sure that verification is actually requested. if [ x"$(verify-outputs)" = xyes ]; then + # Make sure the temporary output doesn't exist (because we want + # to append to it). We are making a temporary output target so if + # there is a crash in the middle, Make will not continue. If we + # write in the final target progressively, the file will exist, + # and its date will be more recent than all prerequisites, so + # next time the project is run, Make will continue and ignore the + # rest of the checks. + rm -f $@.tmp + # Verify the figure datasets. - $(call verify-txt-no-comments-leading-space, \ - $(delete-num), ad345e873e6af577f0e4e7c8942cdf08) - $(call verify-txt-no-comments-leading-space, \ - $(delete-histogram), 12a81c4c8c5f552e5ed5686453587fe8) + $(call verify-txt-no-comments-no-space, \ + $(dm-squared), 6b6d3b0f9c351de53606507b59bca5d1, $@.tmp) + $(call verify-txt-no-comments-no-space, \ + $(dm-img-histogram), b1f9c413f915a1ad96078fee8767b16c, $@.tmp) # Verify TeX macros (the values that go into the PDF text). for m in $(verify-check); do @@ -118,9 +139,11 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705 else echo; echo "'$$m' not recognized."; exit 1 fi - $(call verify-txt-no-comments-leading-space, $$file, $$s) + $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp) done - fi - # Make an empty final target. - touch $@ + # Move temporary file to final target. + mv $@.tmp $@ + else + echo "% Verification was DISABLED!" > $@ + fi |