aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis
diff options
context:
space:
mode:
Diffstat (limited to 'reproduce/analysis')
-rw-r--r--reproduce/analysis/config/metadata-common.conf16
-rw-r--r--reproduce/analysis/config/verify-outputs.conf11
-rw-r--r--reproduce/analysis/make/demo-plot.mk35
-rw-r--r--reproduce/analysis/make/initialize.mk37
-rw-r--r--reproduce/analysis/make/verify.mk8
5 files changed, 89 insertions, 18 deletions
diff --git a/reproduce/analysis/config/metadata-common.conf b/reproduce/analysis/config/metadata-common.conf
new file mode 100644
index 0000000..7bc9fa5
--- /dev/null
+++ b/reproduce/analysis/config/metadata-common.conf
@@ -0,0 +1,16 @@
+# Metadata parameters that can be used in
+
+# Project information
+metadata-title = Towards Long-term and Archivable Reproducibility
+
+# DOIs and identifiers.
+metadata-arxiv =
+metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248
+metadata-doi-journal =
+metadata-doi = $(metadata-doi-zenodo)
+metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper
+
+# Copyright and identifier.
+metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org>
+metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA)
+metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0
diff --git a/reproduce/analysis/config/verify-outputs.conf b/reproduce/analysis/config/verify-outputs.conf
index e4ef479..c9287e8 100644
--- a/reproduce/analysis/config/verify-outputs.conf
+++ b/reproduce/analysis/config/verify-outputs.conf
@@ -1,2 +1,9 @@
-# To enable verification of output datasets set this variable to yes
-verify-outputs =
+# To enable verification of output datasets set this variable to 'yes'.
+#
+# Copyright (C) 2019-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice and
+# this notice are preserved. This file is offered as-is, without any
+# warranty.
+verify-outputs = yes
diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk
index c14b83d..a149040 100644
--- a/reproduce/analysis/make/demo-plot.mk
+++ b/reproduce/analysis/make/demo-plot.mk
@@ -18,7 +18,7 @@
# Directory to host outputs
# -------------------------
-a2dir = $(texdir)/tools-per-year
+a2dir = $(texdir)/to-publish
$(a2dir):; mkdir $@
@@ -27,7 +27,7 @@ $(a2dir):; mkdir $@
# Table for Figure 1C of Menke+20
# -------------------------------
-a2mk20f1c = $(a2dir)/columns.txt
+a2mk20f1c = $(a2dir)/tools-per-year.txt
$(a2mk20f1c): $(mk20tab3) | $(a2dir)
# Remove the (possibly) produced figure that is created from this
@@ -35,12 +35,37 @@ $(a2mk20f1c): $(mk20tab3) | $(a2dir)
# multiple files with a fixed prefix.
rm -f $(tikzdir)/figure-tools-per-year*
+ # Write the column metadata in a temporary file name (appending
+ # '.tmp' to the actual target name). Once all steps are done, it is
+ # renamed to the final target. We do this because if there is an
+ # error in the middle, Make will not consider the job to be
+ # complete and will stop here.
+ echo "# Data of plot showing fraction of papers that mentioned software tools" > $@.tmp
+ echo "# per year to demonstrate the features of Maneage (MANaging data linEAGE)." >> $@.tmp
+ >> $@.tmp
+ echo "# Raw data taken from Menke+2020 (https://doi.org/10.1101/2020.01.15.908111)." \
+ >> $@.tmp
+ echo "# " >> $@.tmp
+ echo "# Column 1: YEAR [count, u16] Publication year of papers." \
+ >> $@.tmp
+ echo "# Column 2: WITH_TOOLS [frac, f32] Fraction of papers mentioning software tools." \
+ >> $@.tmp
+ echo "# Column 3: NUM_PAPERS [count, u32] Total number of papers studied in that year." \
+ >> $@.tmp
+ echo "# " >> $@.tmp
+ $(call print-copyright, $@.tmp)
+
+
# Find the maximum number of papers.
awk '!/^#/{all[$$1]+=$$2; id[$$1]+=$$3} \
END{ for(year in all) \
- print year, 100*id[year]/all[year], all[year] \
+ printf("%-7d%-10.3f%d\n", year, 100*id[year]/all[year], \
+ all[year]) \
}' $< \
- > $@
+ >> $@.tmp
+
+ # Write it into the final target
+ mv $@.tmp $@
@@ -50,7 +75,7 @@ $(a2mk20f1c): $(mk20tab3) | $(a2dir)
$(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf
# Find the first year (first column of first row) of data.
- v=$$(awk 'NR==1{print $$1}' $(a2mk20f1c))
+ v=$$(awk '!/^#/ && c==0{c++; print $$1}' $(a2mk20f1c))
echo "\newcommand{\menkefirstyear}{$$v}" > $@
# Find the number of rows in the plotted table.
diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk
index fe9c103..b0701f4 100644
--- a/reproduce/analysis/make/initialize.mk
+++ b/reproduce/analysis/make/initialize.mk
@@ -213,8 +213,9 @@ $(lockdir): | $(BDIR); mkdir $@
# we want to ensure that the file is always built in every run: it contains
# the project version which may change between two separate runs, even when
# no file actually differs.
-packagebasename := $(shell if [ -d .git ]; then \
- echo paper-$$(git describe --dirty --always --long); else echo NOGIT; fi)
+project-commit-hash := $(shell if [ -d .git ]; then \
+ echo $$(git describe --dirty --always --long); else echo NOGIT; fi)
+packagebasename := paper-$(project-commit-hash)
packagecontents = $(texdir)/$(packagebasename)
.PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \
$(mtexdir)/initialize.tex
@@ -373,6 +374,31 @@ dist-zip: $(packagecontents)
+# Print Copyright statement
+# -------------------------
+#
+# This statement can be used in published datasets that are in plain-text
+# format. It assumes you have already put the data-specific statements in
+# its first argument, it will supplement them with general project links.
+print-copyright = \
+ echo "\# Project title: $(metadata-title)" >> $(1); \
+ echo "\# Git commit (that produced this dataset): $(packagebasename)" >> $(1); \
+ echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \
+ if [ x$(metadata-arxiv) != x ]; then echo "\# arXiv:$(metadata-arxiv)" >> $(1); fi; \
+ if [ x$(metadata-doi-journal) != x ]; then \
+ echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \
+ if [ x$(metadata-doi-zenodo) != x ]; then \
+ echo "\# DOI (Zenodo): $(metadata-doi-zenodo)" >> $(1); fi; \
+ echo "\#" >> $(1); \
+ echo "\# Copyright (C) $$(date +%Y) $(metadata-copyright-owner)" >> $(1); \
+ echo "\# Dataset is available under $(metadata-copyright)." >> $(1); \
+ echo "\# License URL: $(metadata-copyright-url)" >> $(1);
+
+
+
+
+
+
# Project initialization results
# ------------------------------
#
@@ -381,8 +407,5 @@ dist-zip: $(packagecontents)
# calculated everytime the project is run. So even though this file
# actually exists, it is also aded as a `.PHONY' target above.
$(mtexdir)/initialize.tex: | $(mtexdir)
-
- # Version of the project.
- @if [ -d .git ]; then v=$$(git describe --dirty --always --long);
- else v=NO-GIT; fi
- echo "\newcommand{\projectversion}{$$v}" > $@
+ echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@
+ echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@
diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk
index 088b3b3..1573920 100644
--- a/reproduce/analysis/make/verify.mk
+++ b/reproduce/analysis/make/verify.mk
@@ -107,14 +107,14 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)
# Verify the figure datasets.
$(call verify-txt-no-comments-leading-space, \
- $(delete-num), ad345e873e6af577f0e4e7c8942cdf08)
- $(call verify-txt-no-comments-leading-space, \
- $(delete-histogram), 12a81c4c8c5f552e5ed5686453587fe8)
+ $(a2mk20f1c), 76fc5b13495c4d8e8e6f8d440304cf69)
# Verify TeX macros (the values that go into the PDF text).
for m in $(verify-check); do
file=$(mtexdir)/$$m.tex
- if [ $$m == download ]; then s=XXXXX
+ if [ $$m == download ]; then s=64da83ee3bfaa236849927cdc001f5d3
+ elif [ $$m == format ]; then s=e04d95a539b5540c940bf48994d8d45f
+ elif [ $$m == demo-plot ]; then s=2504472bd2b3f60b5a26c5f2a3a67251
else echo; echo "'$$m' not recognized."; exit 1
fi
$(call verify-txt-no-comments-leading-space, $$file, $$s)