aboutsummaryrefslogtreecommitdiff
path: root/reproduce
diff options
context:
space:
mode:
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2020-06-04 04:09:21 +0100
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2020-06-04 04:09:21 +0100
commite3bdc607a7fca8ebd876e1fa6002e679ad32f2c4 (patch)
treebac9de2e3f4f13db09ece9228674a1d546b7f62f /reproduce
parentd85dfdf8d7b0f2769d824fd4994eccec55db963a (diff)
Verification activated, README added, Proper metadata in plot data
All the steps following the to-be-added (in 'README-hacking.md') publication checklist prior to the final check from new clone have been added: - 'README.md' file has been set. - "Reproducible supplement" was added just above the keywords, pointing to Zenodo. - A link to the to-be-uploaded data underlying the plot was added in the caption of the tools-per-year plot. - A new meta-data configuration file was added to store basic project metadata to be used throughout the project. This will later be taken into Maneage. For examle the project title is now stored here and written into the paper's LaTeX source and output datasets automatically. - Verification was activated and plot's data and LaTeX macro files are now automatically verified. - A complete metadata was added for the data underlying the plot. - A generic function was added in 'initialize.mk' that will automatically write project info and copyright in all plain-text outputs.
Diffstat (limited to 'reproduce')
-rw-r--r--reproduce/analysis/config/metadata-common.conf16
-rw-r--r--reproduce/analysis/config/verify-outputs.conf11
-rw-r--r--reproduce/analysis/make/demo-plot.mk35
-rw-r--r--reproduce/analysis/make/initialize.mk37
-rw-r--r--reproduce/analysis/make/verify.mk8
5 files changed, 89 insertions, 18 deletions
diff --git a/reproduce/analysis/config/metadata-common.conf b/reproduce/analysis/config/metadata-common.conf
new file mode 100644
index 0000000..7bc9fa5
--- /dev/null
+++ b/reproduce/analysis/config/metadata-common.conf
@@ -0,0 +1,16 @@
+# Metadata parameters that can be used in
+
+# Project information
+metadata-title = Towards Long-term and Archivable Reproducibility
+
+# DOIs and identifiers.
+metadata-arxiv =
+metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248
+metadata-doi-journal =
+metadata-doi = $(metadata-doi-zenodo)
+metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper
+
+# Copyright and identifier.
+metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org>
+metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA)
+metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0
diff --git a/reproduce/analysis/config/verify-outputs.conf b/reproduce/analysis/config/verify-outputs.conf
index e4ef479..c9287e8 100644
--- a/reproduce/analysis/config/verify-outputs.conf
+++ b/reproduce/analysis/config/verify-outputs.conf
@@ -1,2 +1,9 @@
-# To enable verification of output datasets set this variable to yes
-verify-outputs =
+# To enable verification of output datasets set this variable to 'yes'.
+#
+# Copyright (C) 2019-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice and
+# this notice are preserved. This file is offered as-is, without any
+# warranty.
+verify-outputs = yes
diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk
index c14b83d..a149040 100644
--- a/reproduce/analysis/make/demo-plot.mk
+++ b/reproduce/analysis/make/demo-plot.mk
@@ -18,7 +18,7 @@
# Directory to host outputs
# -------------------------
-a2dir = $(texdir)/tools-per-year
+a2dir = $(texdir)/to-publish
$(a2dir):; mkdir $@
@@ -27,7 +27,7 @@ $(a2dir):; mkdir $@
# Table for Figure 1C of Menke+20
# -------------------------------
-a2mk20f1c = $(a2dir)/columns.txt
+a2mk20f1c = $(a2dir)/tools-per-year.txt
$(a2mk20f1c): $(mk20tab3) | $(a2dir)
# Remove the (possibly) produced figure that is created from this
@@ -35,12 +35,37 @@ $(a2mk20f1c): $(mk20tab3) | $(a2dir)
# multiple files with a fixed prefix.
rm -f $(tikzdir)/figure-tools-per-year*
+ # Write the column metadata in a temporary file name (appending
+ # '.tmp' to the actual target name). Once all steps are done, it is
+ # renamed to the final target. We do this because if there is an
+ # error in the middle, Make will not consider the job to be
+ # complete and will stop here.
+ echo "# Data of plot showing fraction of papers that mentioned software tools" > $@.tmp
+ echo "# per year to demonstrate the features of Maneage (MANaging data linEAGE)." >> $@.tmp
+ >> $@.tmp
+ echo "# Raw data taken from Menke+2020 (https://doi.org/10.1101/2020.01.15.908111)." \
+ >> $@.tmp
+ echo "# " >> $@.tmp
+ echo "# Column 1: YEAR [count, u16] Publication year of papers." \
+ >> $@.tmp
+ echo "# Column 2: WITH_TOOLS [frac, f32] Fraction of papers mentioning software tools." \
+ >> $@.tmp
+ echo "# Column 3: NUM_PAPERS [count, u32] Total number of papers studied in that year." \
+ >> $@.tmp
+ echo "# " >> $@.tmp
+ $(call print-copyright, $@.tmp)
+
+
# Find the maximum number of papers.
awk '!/^#/{all[$$1]+=$$2; id[$$1]+=$$3} \
END{ for(year in all) \
- print year, 100*id[year]/all[year], all[year] \
+ printf("%-7d%-10.3f%d\n", year, 100*id[year]/all[year], \
+ all[year]) \
}' $< \
- > $@
+ >> $@.tmp
+
+ # Write it into the final target
+ mv $@.tmp $@
@@ -50,7 +75,7 @@ $(a2mk20f1c): $(mk20tab3) | $(a2dir)
$(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf
# Find the first year (first column of first row) of data.
- v=$$(awk 'NR==1{print $$1}' $(a2mk20f1c))
+ v=$$(awk '!/^#/ && c==0{c++; print $$1}' $(a2mk20f1c))
echo "\newcommand{\menkefirstyear}{$$v}" > $@
# Find the number of rows in the plotted table.
diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk
index fe9c103..b0701f4 100644
--- a/reproduce/analysis/make/initialize.mk
+++ b/reproduce/analysis/make/initialize.mk
@@ -213,8 +213,9 @@ $(lockdir): | $(BDIR); mkdir $@
# we want to ensure that the file is always built in every run: it contains
# the project version which may change between two separate runs, even when
# no file actually differs.
-packagebasename := $(shell if [ -d .git ]; then \
- echo paper-$$(git describe --dirty --always --long); else echo NOGIT; fi)
+project-commit-hash := $(shell if [ -d .git ]; then \
+ echo $$(git describe --dirty --always --long); else echo NOGIT; fi)
+packagebasename := paper-$(project-commit-hash)
packagecontents = $(texdir)/$(packagebasename)
.PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \
$(mtexdir)/initialize.tex
@@ -373,6 +374,31 @@ dist-zip: $(packagecontents)
+# Print Copyright statement
+# -------------------------
+#
+# This statement can be used in published datasets that are in plain-text
+# format. It assumes you have already put the data-specific statements in
+# its first argument, it will supplement them with general project links.
+print-copyright = \
+ echo "\# Project title: $(metadata-title)" >> $(1); \
+ echo "\# Git commit (that produced this dataset): $(packagebasename)" >> $(1); \
+ echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \
+ if [ x$(metadata-arxiv) != x ]; then echo "\# arXiv:$(metadata-arxiv)" >> $(1); fi; \
+ if [ x$(metadata-doi-journal) != x ]; then \
+ echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \
+ if [ x$(metadata-doi-zenodo) != x ]; then \
+ echo "\# DOI (Zenodo): $(metadata-doi-zenodo)" >> $(1); fi; \
+ echo "\#" >> $(1); \
+ echo "\# Copyright (C) $$(date +%Y) $(metadata-copyright-owner)" >> $(1); \
+ echo "\# Dataset is available under $(metadata-copyright)." >> $(1); \
+ echo "\# License URL: $(metadata-copyright-url)" >> $(1);
+
+
+
+
+
+
# Project initialization results
# ------------------------------
#
@@ -381,8 +407,5 @@ dist-zip: $(packagecontents)
# calculated everytime the project is run. So even though this file
# actually exists, it is also aded as a `.PHONY' target above.
$(mtexdir)/initialize.tex: | $(mtexdir)
-
- # Version of the project.
- @if [ -d .git ]; then v=$$(git describe --dirty --always --long);
- else v=NO-GIT; fi
- echo "\newcommand{\projectversion}{$$v}" > $@
+ echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@
+ echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@
diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk
index 088b3b3..1573920 100644
--- a/reproduce/analysis/make/verify.mk
+++ b/reproduce/analysis/make/verify.mk
@@ -107,14 +107,14 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
# Verify the figure datasets.
$(call verify-txt-no-comments-leading-space, \
- $(delete-num), ad345e873e6af577f0e4e7c8942cdf08)
- $(call verify-txt-no-comments-leading-space, \
- $(delete-histogram), 12a81c4c8c5f552e5ed5686453587fe8)
+ $(a2mk20f1c), 76fc5b13495c4d8e8e6f8d440304cf69)
# Verify TeX macros (the values that go into the PDF text).
for m in $(verify-check); do
file=$(mtexdir)/$$m.tex
- if [ $$m == download ]; then s=XXXXX
+ if [ $$m == download ]; then s=64da83ee3bfaa236849927cdc001f5d3
+ elif [ $$m == format ]; then s=e04d95a539b5540c940bf48994d8d45f
+ elif [ $$m == demo-plot ]; then s=2504472bd2b3f60b5a26c5f2a3a67251
else echo; echo "'$$m' not recognized."; exit 1
fi
$(call verify-txt-no-comments-leading-space, $$file, $$s)