aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis
diff options
context:
space:
mode:
Diffstat (limited to 'reproduce/analysis')
-rw-r--r--reproduce/analysis/config/metadata-common.conf16
-rw-r--r--reproduce/analysis/config/metadata.conf25
-rw-r--r--reproduce/analysis/make/demo-plot.mk2
-rw-r--r--reproduce/analysis/make/initialize.mk115
-rw-r--r--reproduce/analysis/make/verify.mk41
5 files changed, 145 insertions, 54 deletions
diff --git a/reproduce/analysis/config/metadata-common.conf b/reproduce/analysis/config/metadata-common.conf
deleted file mode 100644
index 7bc9fa5..0000000
--- a/reproduce/analysis/config/metadata-common.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-# Metadata parameters that can be used in
-
-# Project information
-metadata-title = Towards Long-term and Archivable Reproducibility
-
-# DOIs and identifiers.
-metadata-arxiv =
-metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248
-metadata-doi-journal =
-metadata-doi = $(metadata-doi-zenodo)
-metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper
-
-# Copyright and identifier.
-metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org>
-metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA)
-metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0
diff --git a/reproduce/analysis/config/metadata.conf b/reproduce/analysis/config/metadata.conf
new file mode 100644
index 0000000..cddc33f
--- /dev/null
+++ b/reproduce/analysis/config/metadata.conf
@@ -0,0 +1,25 @@
+# Project meta-data that can be used in a project's output datasets and
+# final paper. Please set the values here and use them in your analysis or
+# paper, don't repeat them
+#
+# Copyright (C) 2020 Mohammad Akhlaghi <mohammad@akhlaghi.org>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice and
+# this notice are preserved. This file is offered as-is, without any
+# warranty.
+
+# Project information
+metadata-title = Towards Long-term and Archivable Reproducibility
+
+# DOIs and identifiers.
+metadata-arxiv = 2006.03018
+metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248
+metadata-doi-journal =
+metadata-doi = $(metadata-doi-zenodo)
+metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper
+
+# DATA Copyright owner and license information.
+metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org>
+metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA)
+metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0
diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk
index a149040..5ddb3d7 100644
--- a/reproduce/analysis/make/demo-plot.mk
+++ b/reproduce/analysis/make/demo-plot.mk
@@ -79,7 +79,7 @@ $(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf
echo "\newcommand{\menkefirstyear}{$$v}" > $@
# Find the number of rows in the plotted table.
- v=$$(cat $(a2mk20f1c) | wc -l)
+ v=$$(awk '!/^#/{c++} END{print c}' $(a2mk20f1c))
echo "\newcommand{\menkenumyears}{$$v}" >> $@
# Find the number of papers in 1996.
diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk
index 450b673..489f9e3 100644
--- a/reproduce/analysis/make/initialize.mk
+++ b/reproduce/analysis/make/initialize.mk
@@ -203,6 +203,16 @@ $(lockdir): | $(BDIR); mkdir $@
+# Version and distribution tarball definitions
+project-commit-hash := $(shell if [ -d .git ]; then \
+ echo $$(git describe --dirty --always --long); else echo NOGIT; fi)
+project-package-name := maneaged-$(project-commit-hash)
+project-package-contents = $(texdir)/$(project-package-name)
+
+
+
+
+
# High-level Makefile management
# ------------------------------
#
@@ -213,12 +223,8 @@ $(lockdir): | $(BDIR); mkdir $@
# we want to ensure that the file is always built in every run: it contains
# the project version which may change between two separate runs, even when
# no file actually differs.
-project-commit-hash := $(shell if [ -d .git ]; then \
- echo $$(git describe --dirty --always --long); else echo NOGIT; fi)
-packagebasename := paper-$(project-commit-hash)
-packagecontents = $(texdir)/$(packagebasename)
-.PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \
- $(mtexdir)/initialize.tex
+.PHONY: all clean dist dist-zip dist-lzip distclean clean-mmap \
+ $(project-package-contents) $(mtexdir)/initialize.tex
# --------- Delete for no Gnuastro ---------
clean-mmap:; rm -f reproduce/config/gnuastro/mmap*
@@ -262,11 +268,11 @@ distclean: clean
# that is ready for building the final PDF with LaTeX. This is useful for
# collaborators who only want to contribute to the text of your project,
# without having to worry about the technicalities of the analysis.
-$(packagecontents): paper.pdf | $(texdir)
+$(project-package-contents): paper.pdf | $(texdir)
# Set up the output directory, delete it if it exists and remake it
# to fill with new contents.
- dir=$(texdir)/$(packagebasename)
+ dir=$@
rm -rf $$dir
mkdir $$dir
@@ -304,10 +310,7 @@ $(packagecontents): paper.pdf | $(texdir)
cp -r tex/img $$dir/tex/img
cp tex/tikz/*.eps $$dir/tex/tikz
cp -r reproduce/* $$dir/reproduce
- for d in $$(find tex/build/ -mindepth 1 -maxdepth 1 -type d \
- ! -name $(packagebasename)); do
- cp -r $$d $$dir/tex/build
- done
+ cp -r tex/build/!($(project-package-name)) $$dir/tex/build
# Clean up un-necessary/local files: 1) the $(texdir)/build*
# directories (when building in a group structure, there will be
@@ -346,32 +349,88 @@ $(packagecontents): paper.pdf | $(texdir)
# Clean temporary (currently those ending in `~') files.
cd $(texdir)
- find $(packagebasename) -name \*~ -delete
- find $(packagebasename) -name \*.swp -delete
+ find $(project-package-name) -name \*~ -delete
+ find $(project-package-name) -name \*.swp -delete
# PROJECT SPECIFIC
# ----------------
# Put any project specific distribution steps here.
# ----------------
-# Package into `.tar.gz'.
-dist: $(packagecontents)
+# Package into `.tar.gz' or '.tar.lz'.
+dist dist-lzip: $(project-package-contents)
curdir=$$(pwd)
cd $(texdir)
- tar -cf $(packagebasename).tar $(packagebasename)
- gzip -f --best $(packagebasename).tar
- rm -rf $(packagebasename)
+ tar -cf $(project-package-name).tar $(project-package-name)
+ if [ $@ = dist ]; then
+ suffix=gz
+ gzip -f --best $(project-package-name).tar
+ elif [ $@ = dist-lzip ]; then
+ suffix=lz
+ lzip -f --best $(project-package-name).tar
+ fi
+ rm -rf $(project-package-name)
cd $$curdir
- mv $(texdir)/$(packagebasename).tar.gz ./
+ mv $(texdir)/$(project-package-name).tar.$$suffix ./
# Package into `.zip'.
-dist-zip: $(packagecontents)
+dist-zip: $(project-package-contents)
curdir=$$(pwd)
cd $(texdir)
- zip -q -r $(packagebasename).zip $(packagebasename)
- rm -rf $(packagebasename)
+ zip -q -r $(project-package-name).zip $(project-package-name)
+ rm -rf $(project-package-name)
+ cd $$curdir
+ mv $(texdir)/$(project-package-name).zip ./
+
+# Package the software tarballs.
+dist-software:
+ curdir=$$(pwd)
+ cd $(BDIR)
+ if [ -d .git ]; then
+ dirname="software-$$(git describe --dirty --always --long)"
+ else
+ dirname="software-NOGIT";
+ fi
+ mkdir $$dirname
+ cp -L software/tarballs/* $$dirname/
+ tar -cf $$dirname.tar $$dirname
+ gzip -f --best $$dirname.tar
+ rm -rf $$dirname
cd $$curdir
- mv $(texdir)/$(packagebasename).zip ./
+ mv $(BDIR)/$$dir.tar.gz ./
+
+
+
+
+
+# Directory containing to-be-published datasets
+# ---------------------------------------------
+#
+# Its good practice (so you don't forget in the last moment!) to have all
+# the plot/figure/table data that you ultimately want to publish in a
+# single directory.
+#
+# There are two types of to-publish data in the project.
+#
+# 1. Those data that also go into LaTeX (for example to give to LateX's
+# PGFPlots package to create the plot internally) should be under the
+# '$(BDIR)/tex' directory (because other LaTeX producers may also need
+# it for example when using './project make dist'). The contents of
+# this directory are directly taken into the tarball.
+#
+# 2. The data that aren't included directly in the LaTeX run of the paper,
+# can be seen as supplements. A good place to keep them is under your
+# build-directory.
+#
+# RECOMMENDATION: don't put the figure/plot/table number in the names of
+# your to-be-published datasets! Given them a descriptive/short name that
+# would be clear to anyone who has read the paper. Later, in the caption
+# (or paper's tex/appendix), you will put links to the dataset on servers
+# like Zenodo (see the "Publication checklist" in 'README-hacking.md').
+tex-publish-dir = $(texdir)/to-publish
+data-publish-dir = $(BDIR)/data-to-publish
+$(tex-publish-dir):; mkdir $@
+$(data-publish-dir):; mkdir $@
@@ -385,9 +444,10 @@ dist-zip: $(packagecontents)
# its first argument, it will supplement them with general project links.
print-copyright = \
echo "\# Project title: $(metadata-title)" >> $(1); \
- echo "\# Git commit (that produced this dataset): $(packagebasename)" >> $(1); \
+ echo "\# Git commit (that produced this dataset): $(project-commit-hash)" >> $(1); \
echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \
- if [ x$(metadata-arxiv) != x ]; then echo "\# arXiv:$(metadata-arxiv)" >> $(1); fi; \
+ if [ x$(metadata-arxiv) != x ]; then \
+ echo "\# Pre-print server: https://arxiv.org/abs/$(metadata-arxiv)" >> $(1); fi; \
if [ x$(metadata-doi-journal) != x ]; then \
echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \
if [ x$(metadata-doi-zenodo) != x ]; then \
@@ -401,7 +461,6 @@ print-copyright = \
-
# Project initialization results
# ------------------------------
#
@@ -410,5 +469,7 @@ print-copyright = \
# calculated everytime the project is run. So even though this file
# actually exists, it is also aded as a `.PHONY' target above.
$(mtexdir)/initialize.tex: | $(mtexdir)
+
+ # Version and title of project.
echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@
echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@
diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk
index fb8afc0..dd224d6 100644
--- a/reproduce/analysis/make/verify.mk
+++ b/reproduce/analysis/make/verify.mk
@@ -40,22 +40,34 @@ verify-print-tips = \
echo "the following project source file:"; \
echo " reproduce/analysis/make/verify.mk"
-verify-txt-no-comments-leading-space = \
+# Removes following components of a plain-text file, calculates checksum
+# and compares with given checksum:
+# - All commented lines (starting with '#') are removed.
+# - All empty lines are removed.
+# - All space-characters in remaining lines are removed (so the width of
+# the printed columns won't invalidate the verification).
+#
+# It takes three arguments:
+# - First argument: Full address of file to check.
+# - Second argument: Expected checksum of the file to check.
+# - File name to write result.
+verify-txt-no-comments-no-space = \
infile=$(strip $(1)); \
inchecksum=$(strip $(2)); \
+ innobdir=$$(echo $$infile | sed -e's|$(BDIR)/||g'); \
if ! [ -f "$$infile" ]; then \
$(call verify-print-error-start); \
echo "The following file (that should be verified) doesn't exist:"; \
echo " $$infile"; \
echo; exit 1; \
fi; \
- checksum=$$(sed -e 's/^[[:space:]]*//g' \
+ checksum=$$(sed -e 's/[[:space:]][[:space:]]*//g' \
-e 's/\#.*$$//' \
-e '/^$$/d' $$infile \
- | md5sum \
- | awk '{print $$1}'); \
+ | md5sum \
+ | awk '{print $$1}'); \
if [ x"$$inchecksum" = x"$$checksum" ]; then \
- echo "Verified: $$infile"; \
+ echo "%% (VERIFIED) $$checksum $$innobdir" >> $(3); \
else \
$(call verify-print-error-start); \
$(call verify-print-tips); \
@@ -105,6 +117,15 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
# Make sure that verification is actually requested.
if [ x"$(verify-outputs)" = xyes ]; then
+ # Make sure the temporary output doesn't exist (because we want
+ # to append to it). We are making a temporary output target so if
+ # there is a crash in the middle, Make will not continue. If we
+ # write in the final target progressively, the file will exist,
+ # and its date will be more recent than all prerequisites, so
+ # next time the project is run, Make will continue and ignore the
+ # rest of the checks.
+ rm -f $@.tmp
+
# Verify the figure datasets.
$(call verify-txt-no-comments-leading-space, \
$(a2mk20f1c), 76fc5b13495c4d8e8e6f8d440304cf69)
@@ -114,14 +135,14 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
file=$(mtexdir)/$$m.tex
if [ $$m == download ]; then s=64da83ee3bfaa236849927cdc001f5d3
elif [ $$m == format ]; then s=e04d95a539b5540c940bf48994d8d45f
- elif [ $$m == demo-plot ]; then s=2504472bd2b3f60b5a26c5f2a3a67251
+ elif [ $$m == demo-plot ]; then s=48bffe6cf8db790c63a33302d20db77f
else echo; echo "'$$m' not recognized."; exit 1
fi
- $(call verify-txt-no-comments-leading-space, $$file, $$s)
+ $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp)
done
- # Make an empty final target.
- echo "%% Project outputs are verified." > $@
+ # Move temporary file to final target.
+ mv $@.tmp $@
else
- echo "%% Project outputs NOT VERIFIED!!!" > $@
+ echo "% Verification was DISABLED!" > $@
fi