diff options
Diffstat (limited to 'reproduce/analysis')
-rw-r--r-- | reproduce/analysis/config/metadata-common.conf | 16 | ||||
-rw-r--r-- | reproduce/analysis/config/verify-outputs.conf | 11 | ||||
-rw-r--r-- | reproduce/analysis/make/demo-plot.mk | 35 | ||||
-rw-r--r-- | reproduce/analysis/make/initialize.mk | 37 | ||||
-rw-r--r-- | reproduce/analysis/make/verify.mk | 8 |
5 files changed, 89 insertions, 18 deletions
diff --git a/reproduce/analysis/config/metadata-common.conf b/reproduce/analysis/config/metadata-common.conf new file mode 100644 index 0000000..7bc9fa5 --- /dev/null +++ b/reproduce/analysis/config/metadata-common.conf @@ -0,0 +1,16 @@ +# Metadata parameters that can be used in + +# Project information +metadata-title = Towards Long-term and Archivable Reproducibility + +# DOIs and identifiers. +metadata-arxiv = +metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248 +metadata-doi-journal = +metadata-doi = $(metadata-doi-zenodo) +metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper + +# Copyright and identifier. +metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org> +metadata-copyright = Creative Commons Attribution-ShareAlike (CC BY-SA) +metadata-copyright-url = https://creativecommons.org/licenses/by-sa/4.0 diff --git a/reproduce/analysis/config/verify-outputs.conf b/reproduce/analysis/config/verify-outputs.conf index e4ef479..c9287e8 100644 --- a/reproduce/analysis/config/verify-outputs.conf +++ b/reproduce/analysis/config/verify-outputs.conf @@ -1,2 +1,9 @@ -# To enable verification of output datasets set this variable to yes -verify-outputs = +# To enable verification of output datasets set this variable to 'yes'. +# +# Copyright (C) 2019-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice and +# this notice are preserved. This file is offered as-is, without any +# warranty. +verify-outputs = yes diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk index c14b83d..a149040 100644 --- a/reproduce/analysis/make/demo-plot.mk +++ b/reproduce/analysis/make/demo-plot.mk @@ -18,7 +18,7 @@ # Directory to host outputs # ------------------------- -a2dir = $(texdir)/tools-per-year +a2dir = $(texdir)/to-publish $(a2dir):; mkdir $@ @@ -27,7 +27,7 @@ $(a2dir):; mkdir $@ # Table for Figure 1C of Menke+20 # ------------------------------- -a2mk20f1c = $(a2dir)/columns.txt +a2mk20f1c = $(a2dir)/tools-per-year.txt $(a2mk20f1c): $(mk20tab3) | $(a2dir) # Remove the (possibly) produced figure that is created from this @@ -35,12 +35,37 @@ $(a2mk20f1c): $(mk20tab3) | $(a2dir) # multiple files with a fixed prefix. rm -f $(tikzdir)/figure-tools-per-year* + # Write the column metadata in a temporary file name (appending + # '.tmp' to the actual target name). Once all steps are done, it is + # renamed to the final target. We do this because if there is an + # error in the middle, Make will not consider the job to be + # complete and will stop here. + echo "# Data of plot showing fraction of papers that mentioned software tools" > $@.tmp + echo "# per year to demonstrate the features of Maneage (MANaging data linEAGE)." >> $@.tmp + >> $@.tmp + echo "# Raw data taken from Menke+2020 (https://doi.org/10.1101/2020.01.15.908111)." \ + >> $@.tmp + echo "# " >> $@.tmp + echo "# Column 1: YEAR [count, u16] Publication year of papers." \ + >> $@.tmp + echo "# Column 2: WITH_TOOLS [frac, f32] Fraction of papers mentioning software tools." \ + >> $@.tmp + echo "# Column 3: NUM_PAPERS [count, u32] Total number of papers studied in that year." \ + >> $@.tmp + echo "# " >> $@.tmp + $(call print-copyright, $@.tmp) + + # Find the maximum number of papers. awk '!/^#/{all[$$1]+=$$2; id[$$1]+=$$3} \ END{ for(year in all) \ - print year, 100*id[year]/all[year], all[year] \ + printf("%-7d%-10.3f%d\n", year, 100*id[year]/all[year], \ + all[year]) \ }' $< \ - > $@ + >> $@.tmp + + # Write it into the final target + mv $@.tmp $@ @@ -50,7 +75,7 @@ $(a2mk20f1c): $(mk20tab3) | $(a2dir) $(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf # Find the first year (first column of first row) of data. - v=$$(awk 'NR==1{print $$1}' $(a2mk20f1c)) + v=$$(awk '!/^#/ && c==0{c++; print $$1}' $(a2mk20f1c)) echo "\newcommand{\menkefirstyear}{$$v}" > $@ # Find the number of rows in the plotted table. diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index fe9c103..b0701f4 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -213,8 +213,9 @@ $(lockdir): | $(BDIR); mkdir $@ # we want to ensure that the file is always built in every run: it contains # the project version which may change between two separate runs, even when # no file actually differs. -packagebasename := $(shell if [ -d .git ]; then \ - echo paper-$$(git describe --dirty --always --long); else echo NOGIT; fi) +project-commit-hash := $(shell if [ -d .git ]; then \ + echo $$(git describe --dirty --always --long); else echo NOGIT; fi) +packagebasename := paper-$(project-commit-hash) packagecontents = $(texdir)/$(packagebasename) .PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \ $(mtexdir)/initialize.tex @@ -373,6 +374,31 @@ dist-zip: $(packagecontents) +# Print Copyright statement +# ------------------------- +# +# This statement can be used in published datasets that are in plain-text +# format. It assumes you have already put the data-specific statements in +# its first argument, it will supplement them with general project links. +print-copyright = \ + echo "\# Project title: $(metadata-title)" >> $(1); \ + echo "\# Git commit (that produced this dataset): $(packagebasename)" >> $(1); \ + echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \ + if [ x$(metadata-arxiv) != x ]; then echo "\# arXiv:$(metadata-arxiv)" >> $(1); fi; \ + if [ x$(metadata-doi-journal) != x ]; then \ + echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \ + if [ x$(metadata-doi-zenodo) != x ]; then \ + echo "\# DOI (Zenodo): $(metadata-doi-zenodo)" >> $(1); fi; \ + echo "\#" >> $(1); \ + echo "\# Copyright (C) $$(date +%Y) $(metadata-copyright-owner)" >> $(1); \ + echo "\# Dataset is available under $(metadata-copyright)." >> $(1); \ + echo "\# License URL: $(metadata-copyright-url)" >> $(1); + + + + + + # Project initialization results # ------------------------------ # @@ -381,8 +407,5 @@ dist-zip: $(packagecontents) # calculated everytime the project is run. So even though this file # actually exists, it is also aded as a `.PHONY' target above. $(mtexdir)/initialize.tex: | $(mtexdir) - - # Version of the project. - @if [ -d .git ]; then v=$$(git describe --dirty --always --long); - else v=NO-GIT; fi - echo "\newcommand{\projectversion}{$$v}" > $@ + echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@ + echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@ diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 088b3b3..1573920 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -107,14 +107,14 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) # Verify the figure datasets. $(call verify-txt-no-comments-leading-space, \ - $(delete-num), ad345e873e6af577f0e4e7c8942cdf08) - $(call verify-txt-no-comments-leading-space, \ - $(delete-histogram), 12a81c4c8c5f552e5ed5686453587fe8) + $(a2mk20f1c), 76fc5b13495c4d8e8e6f8d440304cf69) # Verify TeX macros (the values that go into the PDF text). for m in $(verify-check); do file=$(mtexdir)/$$m.tex - if [ $$m == download ]; then s=XXXXX + if [ $$m == download ]; then s=64da83ee3bfaa236849927cdc001f5d3 + elif [ $$m == format ]; then s=e04d95a539b5540c940bf48994d8d45f + elif [ $$m == demo-plot ]; then s=2504472bd2b3f60b5a26c5f2a3a67251 else echo; echo "'$$m' not recognized."; exit 1 fi $(call verify-txt-no-comments-leading-space, $$file, $$s) |