diff options
Diffstat (limited to 'reproduce')
-rw-r--r-- | reproduce/analysis/config/metadata-common.conf | 16 | ||||
-rw-r--r-- | reproduce/analysis/config/metadata.conf | 25 | ||||
-rw-r--r-- | reproduce/analysis/make/demo-plot.mk | 2 | ||||
-rw-r--r-- | reproduce/analysis/make/initialize.mk | 115 | ||||
-rw-r--r-- | reproduce/analysis/make/verify.mk | 41 | ||||
-rwxr-xr-x | reproduce/software/shell/configure.sh | 15 |
6 files changed, 160 insertions, 54 deletions
diff --git a/reproduce/analysis/config/metadata-common.conf b/reproduce/analysis/config/metadata-common.conf deleted file mode 100644 index 7bc9fa5..0000000 --- a/reproduce/analysis/config/metadata-common.conf +++ /dev/null @@ -1,16 +0,0 @@ -# Metadata parameters that can be used in - -# Project information -metadata-title = Towards Long-term and Archivable Reproducibility - -# DOIs and identifiers. -metadata-arxiv = -metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248 -metadata-doi-journal = -metadata-doi = $(metadata-doi-zenodo) -metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper - -# Copyright and identifier. -metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org> -metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA) -metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0 diff --git a/reproduce/analysis/config/metadata.conf b/reproduce/analysis/config/metadata.conf new file mode 100644 index 0000000..cddc33f --- /dev/null +++ b/reproduce/analysis/config/metadata.conf @@ -0,0 +1,25 @@ +# Project meta-data that can be used in a project's output datasets and +# final paper. Please set the values here and use them in your analysis or +# paper, don't repeat them +# +# Copyright (C) 2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice and +# this notice are preserved. This file is offered as-is, without any +# warranty. + +# Project information +metadata-title = Towards Long-term and Archivable Reproducibility + +# DOIs and identifiers. +metadata-arxiv = 2006.03018 +metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248 +metadata-doi-journal = +metadata-doi = $(metadata-doi-zenodo) +metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper + +# DATA Copyright owner and license information. +metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org> +metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA) +metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0 diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk index a149040..5ddb3d7 100644 --- a/reproduce/analysis/make/demo-plot.mk +++ b/reproduce/analysis/make/demo-plot.mk @@ -79,7 +79,7 @@ $(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf echo "\newcommand{\menkefirstyear}{$$v}" > $@ # Find the number of rows in the plotted table. - v=$$(cat $(a2mk20f1c) | wc -l) + v=$$(awk '!/^#/{c++} END{print c}' $(a2mk20f1c)) echo "\newcommand{\menkenumyears}{$$v}" >> $@ # Find the number of papers in 1996. diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 450b673..489f9e3 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -203,6 +203,16 @@ $(lockdir): | $(BDIR); mkdir $@ +# Version and distribution tarball definitions +project-commit-hash := $(shell if [ -d .git ]; then \ + echo $$(git describe --dirty --always --long); else echo NOGIT; fi) +project-package-name := maneaged-$(project-commit-hash) +project-package-contents = $(texdir)/$(project-package-name) + + + + + # High-level Makefile management # ------------------------------ # @@ -213,12 +223,8 @@ $(lockdir): | $(BDIR); mkdir $@ # we want to ensure that the file is always built in every run: it contains # the project version which may change between two separate runs, even when # no file actually differs. -project-commit-hash := $(shell if [ -d .git ]; then \ - echo $$(git describe --dirty --always --long); else echo NOGIT; fi) -packagebasename := paper-$(project-commit-hash) -packagecontents = $(texdir)/$(packagebasename) -.PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \ - $(mtexdir)/initialize.tex +.PHONY: all clean dist dist-zip dist-lzip distclean clean-mmap \ + $(project-package-contents) $(mtexdir)/initialize.tex # --------- Delete for no Gnuastro --------- clean-mmap:; rm -f reproduce/config/gnuastro/mmap* @@ -262,11 +268,11 @@ distclean: clean # that is ready for building the final PDF with LaTeX. This is useful for # collaborators who only want to contribute to the text of your project, # without having to worry about the technicalities of the analysis. -$(packagecontents): paper.pdf | $(texdir) +$(project-package-contents): paper.pdf | $(texdir) # Set up the output directory, delete it if it exists and remake it # to fill with new contents. - dir=$(texdir)/$(packagebasename) + dir=$@ rm -rf $$dir mkdir $$dir @@ -304,10 +310,7 @@ $(packagecontents): paper.pdf | $(texdir) cp -r tex/img $$dir/tex/img cp tex/tikz/*.eps $$dir/tex/tikz cp -r reproduce/* $$dir/reproduce - for d in $$(find tex/build/ -mindepth 1 -maxdepth 1 -type d \ - ! -name $(packagebasename)); do - cp -r $$d $$dir/tex/build - done + cp -r tex/build/!($(project-package-name)) $$dir/tex/build # Clean up un-necessary/local files: 1) the $(texdir)/build* # directories (when building in a group structure, there will be @@ -346,32 +349,88 @@ $(packagecontents): paper.pdf | $(texdir) # Clean temporary (currently those ending in `~') files. cd $(texdir) - find $(packagebasename) -name \*~ -delete - find $(packagebasename) -name \*.swp -delete + find $(project-package-name) -name \*~ -delete + find $(project-package-name) -name \*.swp -delete # PROJECT SPECIFIC # ---------------- # Put any project specific distribution steps here. # ---------------- -# Package into `.tar.gz'. -dist: $(packagecontents) +# Package into `.tar.gz' or '.tar.lz'. +dist dist-lzip: $(project-package-contents) curdir=$$(pwd) cd $(texdir) - tar -cf $(packagebasename).tar $(packagebasename) - gzip -f --best $(packagebasename).tar - rm -rf $(packagebasename) + tar -cf $(project-package-name).tar $(project-package-name) + if [ $@ = dist ]; then + suffix=gz + gzip -f --best $(project-package-name).tar + elif [ $@ = dist-lzip ]; then + suffix=lz + lzip -f --best $(project-package-name).tar + fi + rm -rf $(project-package-name) cd $$curdir - mv $(texdir)/$(packagebasename).tar.gz ./ + mv $(texdir)/$(project-package-name).tar.$$suffix ./ # Package into `.zip'. -dist-zip: $(packagecontents) +dist-zip: $(project-package-contents) curdir=$$(pwd) cd $(texdir) - zip -q -r $(packagebasename).zip $(packagebasename) - rm -rf $(packagebasename) + zip -q -r $(project-package-name).zip $(project-package-name) + rm -rf $(project-package-name) + cd $$curdir + mv $(texdir)/$(project-package-name).zip ./ + +# Package the software tarballs. +dist-software: + curdir=$$(pwd) + cd $(BDIR) + if [ -d .git ]; then + dirname="software-$$(git describe --dirty --always --long)" + else + dirname="software-NOGIT"; + fi + mkdir $$dirname + cp -L software/tarballs/* $$dirname/ + tar -cf $$dirname.tar $$dirname + gzip -f --best $$dirname.tar + rm -rf $$dirname cd $$curdir - mv $(texdir)/$(packagebasename).zip ./ + mv $(BDIR)/$$dir.tar.gz ./ + + + + + +# Directory containing to-be-published datasets +# --------------------------------------------- +# +# Its good practice (so you don't forget in the last moment!) to have all +# the plot/figure/table data that you ultimately want to publish in a +# single directory. +# +# There are two types of to-publish data in the project. +# +# 1. Those data that also go into LaTeX (for example to give to LateX's +# PGFPlots package to create the plot internally) should be under the +# '$(BDIR)/tex' directory (because other LaTeX producers may also need +# it for example when using './project make dist'). The contents of +# this directory are directly taken into the tarball. +# +# 2. The data that aren't included directly in the LaTeX run of the paper, +# can be seen as supplements. A good place to keep them is under your +# build-directory. +# +# RECOMMENDATION: don't put the figure/plot/table number in the names of +# your to-be-published datasets! Given them a descriptive/short name that +# would be clear to anyone who has read the paper. Later, in the caption +# (or paper's tex/appendix), you will put links to the dataset on servers +# like Zenodo (see the "Publication checklist" in 'README-hacking.md'). +tex-publish-dir = $(texdir)/to-publish +data-publish-dir = $(BDIR)/data-to-publish +$(tex-publish-dir):; mkdir $@ +$(data-publish-dir):; mkdir $@ @@ -385,9 +444,10 @@ dist-zip: $(packagecontents) # its first argument, it will supplement them with general project links. print-copyright = \ echo "\# Project title: $(metadata-title)" >> $(1); \ - echo "\# Git commit (that produced this dataset): $(packagebasename)" >> $(1); \ + echo "\# Git commit (that produced this dataset): $(project-commit-hash)" >> $(1); \ echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \ - if [ x$(metadata-arxiv) != x ]; then echo "\# arXiv:$(metadata-arxiv)" >> $(1); fi; \ + if [ x$(metadata-arxiv) != x ]; then \ + echo "\# Pre-print server: https://arxiv.org/abs/$(metadata-arxiv)" >> $(1); fi; \ if [ x$(metadata-doi-journal) != x ]; then \ echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \ if [ x$(metadata-doi-zenodo) != x ]; then \ @@ -401,7 +461,6 @@ print-copyright = \ - # Project initialization results # ------------------------------ # @@ -410,5 +469,7 @@ print-copyright = \ # calculated everytime the project is run. So even though this file # actually exists, it is also aded as a `.PHONY' target above. $(mtexdir)/initialize.tex: | $(mtexdir) + + # Version and title of project. echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@ echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@ diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index fb8afc0..dd224d6 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -40,22 +40,34 @@ verify-print-tips = \ echo "the following project source file:"; \ echo " reproduce/analysis/make/verify.mk" -verify-txt-no-comments-leading-space = \ +# Removes following components of a plain-text file, calculates checksum +# and compares with given checksum: +# - All commented lines (starting with '#') are removed. +# - All empty lines are removed. +# - All space-characters in remaining lines are removed (so the width of +# the printed columns won't invalidate the verification). +# +# It takes three arguments: +# - First argument: Full address of file to check. +# - Second argument: Expected checksum of the file to check. +# - File name to write result. +verify-txt-no-comments-no-space = \ infile=$(strip $(1)); \ inchecksum=$(strip $(2)); \ + innobdir=$$(echo $$infile | sed -e's|$(BDIR)/||g'); \ if ! [ -f "$$infile" ]; then \ $(call verify-print-error-start); \ echo "The following file (that should be verified) doesn't exist:"; \ echo " $$infile"; \ echo; exit 1; \ fi; \ - checksum=$$(sed -e 's/^[[:space:]]*//g' \ + checksum=$$(sed -e 's/[[:space:]][[:space:]]*//g' \ -e 's/\#.*$$//' \ -e '/^$$/d' $$infile \ - | md5sum \ - | awk '{print $$1}'); \ + | md5sum \ + | awk '{print $$1}'); \ if [ x"$$inchecksum" = x"$$checksum" ]; then \ - echo "Verified: $$infile"; \ + echo "%% (VERIFIED) $$checksum $$innobdir" >> $(3); \ else \ $(call verify-print-error-start); \ $(call verify-print-tips); \ @@ -105,6 +117,15 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) # Make sure that verification is actually requested. if [ x"$(verify-outputs)" = xyes ]; then + # Make sure the temporary output doesn't exist (because we want + # to append to it). We are making a temporary output target so if + # there is a crash in the middle, Make will not continue. If we + # write in the final target progressively, the file will exist, + # and its date will be more recent than all prerequisites, so + # next time the project is run, Make will continue and ignore the + # rest of the checks. + rm -f $@.tmp + # Verify the figure datasets. $(call verify-txt-no-comments-leading-space, \ $(a2mk20f1c), 76fc5b13495c4d8e8e6f8d440304cf69) @@ -114,14 +135,14 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) file=$(mtexdir)/$$m.tex if [ $$m == download ]; then s=64da83ee3bfaa236849927cdc001f5d3 elif [ $$m == format ]; then s=e04d95a539b5540c940bf48994d8d45f - elif [ $$m == demo-plot ]; then s=2504472bd2b3f60b5a26c5f2a3a67251 + elif [ $$m == demo-plot ]; then s=48bffe6cf8db790c63a33302d20db77f else echo; echo "'$$m' not recognized."; exit 1 fi - $(call verify-txt-no-comments-leading-space, $$file, $$s) + $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp) done - # Make an empty final target. - echo "%% Project outputs are verified." > $@ + # Move temporary file to final target. + mv $@.tmp $@ else - echo "%% Project outputs NOT VERIFIED!!!" > $@ + echo "% Verification was DISABLED!" > $@ fi diff --git a/reproduce/software/shell/configure.sh b/reproduce/software/shell/configure.sh index 1c7e60d..3b3c38f 100755 --- a/reproduce/software/shell/configure.sh +++ b/reproduce/software/shell/configure.sh @@ -1058,6 +1058,21 @@ fi +# If 'tex/build' and 'tex/tikz' are symbolic links then 'rm -f' will delete +# them and we can continue. However, when the project is being built from +# the tarball, these two are not symbolic links but actual directories with +# the necessary built-components to build the PDF in them. In this case, +# because 'tex/build' is a directory, 'rm -f' will fail, so we'll just +# rename the two directories (as backup) and let the project build the +# proper symbolic links afterwards. +if rm -f tex/build; then + rm -f tex/tikz +else + mv tex/tikz tex/tikz-from-tarball + mv tex/build tex/build-from-tarball +fi + + # Set the symbolic links for easy access to the top project build # directories. Note that these are put in each user's source/cloned # directory, not in the build directory (which can be shared between many |