aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis
diff options
context:
space:
mode:
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2020-06-09 02:36:34 +0100
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2020-06-09 03:39:46 +0100
commit4646fa400796ac6b46e07429c43537475d953dff (patch)
tree2509a15e6d084940e0442dc6be7826412662588c /reproduce/analysis
parent77aa9c1ebc26ae88b37f3127a73fd0c8973378d3 (diff)
parent623ae15c95bb8575b111709705c29b10fcf7c12b (diff)
Imported Maneage, minor conflicts fixed, a bug found and fixed
Some minor conflicts came up in 'initialize.mk' and 'verify.mk'. For the former, I chose the version on Maneage, for the latter, I kept the 'master' version on the checksums of this project, but kept the Maneage version for the rest of the improvements there (like printing the verified files as LaTeX comments in 'verify.tex'. While testing the conflicts, I noticed a bug (in the LaTeX macro for the number of years in the Menke+20 paper) in the previous build, thanks to the verification step :-)! Fortunately it wasn't actually printed in the PDF, so a normal reader won't recognize. The bug was caused by the recently added meta-data/commented lines in the 'tools-per-year.txt' file: when calculating the number of years studied in that paper, we were simply counting all the lines and we had forgot to correct this after adding comments. As a result, the un-used LaTeX macro file was saying that they have studied 47 years instead of the real 31 years! This element was actually used in the very first (+40 page!) draft of the paper that was summarized to fit into the journal limits.
Diffstat (limited to 'reproduce/analysis')
-rw-r--r--reproduce/analysis/config/metadata-common.conf16
-rw-r--r--reproduce/analysis/config/metadata.conf25
-rw-r--r--reproduce/analysis/make/demo-plot.mk2
-rw-r--r--reproduce/analysis/make/initialize.mk115
-rw-r--r--reproduce/analysis/make/verify.mk41
5 files changed, 145 insertions, 54 deletions
diff --git a/reproduce/analysis/config/metadata-common.conf b/reproduce/analysis/config/metadata-common.conf
deleted file mode 100644
index 7bc9fa5..0000000
--- a/reproduce/analysis/config/metadata-common.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-# Metadata parameters that can be used in
-
-# Project information
-metadata-title = Towards Long-term and Archivable Reproducibility
-
-# DOIs and identifiers.
-metadata-arxiv =
-metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248
-metadata-doi-journal =
-metadata-doi = $(metadata-doi-zenodo)
-metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper
-
-# Copyright and identifier.
-metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org>
-metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA)
-metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0
diff --git a/reproduce/analysis/config/metadata.conf b/reproduce/analysis/config/metadata.conf
new file mode 100644
index 0000000..cddc33f
--- /dev/null
+++ b/reproduce/analysis/config/metadata.conf
@@ -0,0 +1,25 @@
+# Project meta-data that can be used in a project's output datasets and
+# final paper. Please set the values here and use them in your analysis or
+# paper, don't repeat them
+#
+# Copyright (C) 2020 Mohammad Akhlaghi <mohammad@akhlaghi.org>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice and
+# this notice are preserved. This file is offered as-is, without any
+# warranty.
+
+# Project information
+metadata-title = Towards Long-term and Archivable Reproducibility
+
+# DOIs and identifiers.
+metadata-arxiv = 2006.03018
+metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248
+metadata-doi-journal =
+metadata-doi = $(metadata-doi-zenodo)
+metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper
+
+# DATA Copyright owner and license information.
+metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org>
+metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA)
+metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0
diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk
index a149040..5ddb3d7 100644
--- a/reproduce/analysis/make/demo-plot.mk
+++ b/reproduce/analysis/make/demo-plot.mk
@@ -79,7 +79,7 @@ $(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf
echo "\newcommand{\menkefirstyear}{$$v}" > $@
# Find the number of rows in the plotted table.
- v=$$(cat $(a2mk20f1c) | wc -l)
+ v=$$(awk '!/^#/{c++} END{print c}' $(a2mk20f1c))
echo "\newcommand{\menkenumyears}{$$v}" >> $@
# Find the number of papers in 1996.
diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk
index 450b673..489f9e3 100644
--- a/reproduce/analysis/make/initialize.mk
+++ b/reproduce/analysis/make/initialize.mk
@@ -203,6 +203,16 @@ $(lockdir): | $(BDIR); mkdir $@
+# Version and distribution tarball definitions
+project-commit-hash := $(shell if [ -d .git ]; then \
+ echo $$(git describe --dirty --always --long); else echo NOGIT; fi)
+project-package-name := maneaged-$(project-commit-hash)
+project-package-contents = $(texdir)/$(project-package-name)
+
+
+
+
+
# High-level Makefile management
# ------------------------------
#
@@ -213,12 +223,8 @@ $(lockdir): | $(BDIR); mkdir $@
# we want to ensure that the file is always built in every run: it contains
# the project version which may change between two separate runs, even when
# no file actually differs.
-project-commit-hash := $(shell if [ -d .git ]; then \
- echo $$(git describe --dirty --always --long); else echo NOGIT; fi)
-packagebasename := paper-$(project-commit-hash)
-packagecontents = $(texdir)/$(packagebasename)
-.PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \
- $(mtexdir)/initialize.tex
+.PHONY: all clean dist dist-zip dist-lzip distclean clean-mmap \
+ $(project-package-contents) $(mtexdir)/initialize.tex
# --------- Delete for no Gnuastro ---------
clean-mmap:; rm -f reproduce/config/gnuastro/mmap*
@@ -262,11 +268,11 @@ distclean: clean
# that is ready for building the final PDF with LaTeX. This is useful for
# collaborators who only want to contribute to the text of your project,
# without having to worry about the technicalities of the analysis.
-$(packagecontents): paper.pdf | $(texdir)
+$(project-package-contents): paper.pdf | $(texdir)
# Set up the output directory, delete it if it exists and remake it
# to fill with new contents.
- dir=$(texdir)/$(packagebasename)
+ dir=$@
rm -rf $$dir
mkdir $$dir
@@ -304,10 +310,7 @@ $(packagecontents): paper.pdf | $(texdir)
cp -r tex/img $$dir/tex/img
cp tex/tikz/*.eps $$dir/tex/tikz
cp -r reproduce/* $$dir/reproduce
- for d in $$(find tex/build/ -mindepth 1 -maxdepth 1 -type d \
- ! -name $(packagebasename)); do
- cp -r $$d $$dir/tex/build
- done
+ cp -r tex/build/!($(project-package-name)) $$dir/tex/build
# Clean up un-necessary/local files: 1) the $(texdir)/build*
# directories (when building in a group structure, there will be
@@ -346,32 +349,88 @@ $(packagecontents): paper.pdf | $(texdir)
# Clean temporary (currently those ending in `~') files.
cd $(texdir)
- find $(packagebasename) -name \*~ -delete
- find $(packagebasename) -name \*.swp -delete
+ find $(project-package-name) -name \*~ -delete
+ find $(project-package-name) -name \*.swp -delete
# PROJECT SPECIFIC
# ----------------
# Put any project specific distribution steps here.
# ----------------
-# Package into `.tar.gz'.
-dist: $(packagecontents)
+# Package into `.tar.gz' or '.tar.lz'.
+dist dist-lzip: $(project-package-contents)
curdir=$$(pwd)
cd $(texdir)
- tar -cf $(packagebasename).tar $(packagebasename)
- gzip -f --best $(packagebasename).tar
- rm -rf $(packagebasename)
+ tar -cf $(project-package-name).tar $(project-package-name)
+ if [ $@ = dist ]; then
+ suffix=gz
+ gzip -f --best $(project-package-name).tar
+ elif [ $@ = dist-lzip ]; then
+ suffix=lz
+ lzip -f --best $(project-package-name).tar
+ fi
+ rm -rf $(project-package-name)
cd $$curdir
- mv $(texdir)/$(packagebasename).tar.gz ./
+ mv $(texdir)/$(project-package-name).tar.$$suffix ./
# Package into `.zip'.
-dist-zip: $(packagecontents)
+dist-zip: $(project-package-contents)
curdir=$$(pwd)
cd $(texdir)
- zip -q -r $(packagebasename).zip $(packagebasename)
- rm -rf $(packagebasename)
+ zip -q -r $(project-package-name).zip $(project-package-name)
+ rm -rf $(project-package-name)
+ cd $$curdir
+ mv $(texdir)/$(project-package-name).zip ./
+
+# Package the software tarballs.
+dist-software:
+ curdir=$$(pwd)
+ cd $(BDIR)
+ if [ -d .git ]; then
+ dirname="software-$$(git describe --dirty --always --long)"
+ else
+ dirname="software-NOGIT";
+ fi
+ mkdir $$dirname
+ cp -L software/tarballs/* $$dirname/
+ tar -cf $$dirname.tar $$dirname
+ gzip -f --best $$dirname.tar
+ rm -rf $$dirname
cd $$curdir
- mv $(texdir)/$(packagebasename).zip ./
+ mv $(BDIR)/$$dir.tar.gz ./
+
+
+
+
+
+# Directory containing to-be-published datasets
+# ---------------------------------------------
+#
+# Its good practice (so you don't forget in the last moment!) to have all
+# the plot/figure/table data that you ultimately want to publish in a
+# single directory.
+#
+# There are two types of to-publish data in the project.
+#
+# 1. Those data that also go into LaTeX (for example to give to LateX's
+# PGFPlots package to create the plot internally) should be under the
+# '$(BDIR)/tex' directory (because other LaTeX producers may also need
+# it for example when using './project make dist'). The contents of
+# this directory are directly taken into the tarball.
+#
+# 2. The data that aren't included directly in the LaTeX run of the paper,
+# can be seen as supplements. A good place to keep them is under your
+# build-directory.
+#
+# RECOMMENDATION: don't put the figure/plot/table number in the names of
+# your to-be-published datasets! Given them a descriptive/short name that
+# would be clear to anyone who has read the paper. Later, in the caption
+# (or paper's tex/appendix), you will put links to the dataset on servers
+# like Zenodo (see the "Publication checklist" in 'README-hacking.md').
+tex-publish-dir = $(texdir)/to-publish
+data-publish-dir = $(BDIR)/data-to-publish
+$(tex-publish-dir):; mkdir $@
+$(data-publish-dir):; mkdir $@
@@ -385,9 +444,10 @@ dist-zip: $(packagecontents)
# its first argument, it will supplement them with general project links.
print-copyright = \
echo "\# Project title: $(metadata-title)" >> $(1); \
- echo "\# Git commit (that produced this dataset): $(packagebasename)" >> $(1); \
+ echo "\# Git commit (that produced this dataset): $(project-commit-hash)" >> $(1); \
echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \
- if [ x$(metadata-arxiv) != x ]; then echo "\# arXiv:$(metadata-arxiv)" >> $(1); fi; \
+ if [ x$(metadata-arxiv) != x ]; then \
+ echo "\# Pre-print server: https://arxiv.org/abs/$(metadata-arxiv)" >> $(1); fi; \
if [ x$(metadata-doi-journal) != x ]; then \
echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \
if [ x$(metadata-doi-zenodo) != x ]; then \
@@ -401,7 +461,6 @@ print-copyright = \
-
# Project initialization results
# ------------------------------
#
@@ -410,5 +469,7 @@ print-copyright = \
# calculated everytime the project is run. So even though this file
# actually exists, it is also aded as a `.PHONY' target above.
$(mtexdir)/initialize.tex: | $(mtexdir)
+
+ # Version and title of project.
echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@
echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@
diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk
index fb8afc0..dd224d6 100644
--- a/reproduce/analysis/make/verify.mk
+++ b/reproduce/analysis/make/verify.mk
@@ -40,22 +40,34 @@ verify-print-tips = \
echo "the following project source file:"; \
echo " reproduce/analysis/make/verify.mk"
-verify-txt-no-comments-leading-space = \
+# Removes following components of a plain-text file, calculates checksum
+# and compares with given checksum:
+# - All commented lines (starting with '#') are removed.
+# - All empty lines are removed.
+# - All space-characters in remaining lines are removed (so the width of
+# the printed columns won't invalidate the verification).
+#
+# It takes three arguments:
+# - First argument: Full address of file to check.
+# - Second argument: Expected checksum of the file to check.
+# - File name to write result.
+verify-txt-no-comments-no-space = \
infile=$(strip $(1)); \
inchecksum=$(strip $(2)); \
+ innobdir=$$(echo $$infile | sed -e's|$(BDIR)/||g'); \
if ! [ -f "$$infile" ]; then \
$(call verify-print-error-start); \
echo "The following file (that should be verified) doesn't exist:"; \
echo " $$infile"; \
echo; exit 1; \
fi; \
- checksum=$$(sed -e 's/^[[:space:]]*//g' \
+ checksum=$$(sed -e 's/[[:space:]][[:space:]]*//g' \
-e 's/\#.*$$//' \
-e '/^$$/d' $$infile \
- | md5sum \
- | awk '{print $$1}'); \
+ | md5sum \
+ | awk '{print $$1}'); \
if [ x"$$inchecksum" = x"$$checksum" ]; then \
- echo "Verified: $$infile"; \
+ echo "%% (VERIFIED) $$checksum $$innobdir" >> $(3); \
else \
$(call verify-print-error-start); \
$(call verify-print-tips); \
@@ -105,6 +117,15 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
# Make sure that verification is actually requested.
if [ x"$(verify-outputs)" = xyes ]; then
+ # Make sure the temporary output doesn't exist (because we want
+ # to append to it). We are making a temporary output target so if
+ # there is a crash in the middle, Make will not continue. If we
+ # write in the final target progressively, the file will exist,
+ # and its date will be more recent than all prerequisites, so
+ # next time the project is run, Make will continue and ignore the
+ # rest of the checks.
+ rm -f $@.tmp
+
# Verify the figure datasets.
$(call verify-txt-no-comments-leading-space, \
$(a2mk20f1c), 76fc5b13495c4d8e8e6f8d440304cf69)
@@ -114,14 +135,14 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
file=$(mtexdir)/$$m.tex
if [ $$m == download ]; then s=64da83ee3bfaa236849927cdc001f5d3
elif [ $$m == format ]; then s=e04d95a539b5540c940bf48994d8d45f
- elif [ $$m == demo-plot ]; then s=2504472bd2b3f60b5a26c5f2a3a67251
+ elif [ $$m == demo-plot ]; then s=48bffe6cf8db790c63a33302d20db77f
else echo; echo "'$$m' not recognized."; exit 1
fi
- $(call verify-txt-no-comments-leading-space, $$file, $$s)
+ $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp)
done
- # Make an empty final target.
- echo "%% Project outputs are verified." > $@
+ # Move temporary file to final target.
+ mv $@.tmp $@
else
- echo "%% Project outputs NOT VERIFIED!!!" > $@
+ echo "% Verification was DISABLED!" > $@
fi