From e3bdc607a7fca8ebd876e1fa6002e679ad32f2c4 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Thu, 4 Jun 2020 04:09:21 +0100 Subject: Verification activated, README added, Proper metadata in plot data All the steps following the to-be-added (in 'README-hacking.md') publication checklist prior to the final check from new clone have been added: - 'README.md' file has been set. - "Reproducible supplement" was added just above the keywords, pointing to Zenodo. - A link to the to-be-uploaded data underlying the plot was added in the caption of the tools-per-year plot. - A new meta-data configuration file was added to store basic project metadata to be used throughout the project. This will later be taken into Maneage. For examle the project title is now stored here and written into the paper's LaTeX source and output datasets automatically. - Verification was activated and plot's data and LaTeX macro files are now automatically verified. - A complete metadata was added for the data underlying the plot. - A generic function was added in 'initialize.mk' that will automatically write project info and copyright in all plain-text outputs. --- reproduce/analysis/make/demo-plot.mk | 35 ++++++++++++++++++++++++++++----- reproduce/analysis/make/initialize.mk | 37 ++++++++++++++++++++++++++++------- reproduce/analysis/make/verify.mk | 8 ++++---- 3 files changed, 64 insertions(+), 16 deletions(-) (limited to 'reproduce/analysis/make') diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk index c14b83d..a149040 100644 --- a/reproduce/analysis/make/demo-plot.mk +++ b/reproduce/analysis/make/demo-plot.mk @@ -18,7 +18,7 @@ # Directory to host outputs # ------------------------- -a2dir = $(texdir)/tools-per-year +a2dir = $(texdir)/to-publish $(a2dir):; mkdir $@ @@ -27,7 +27,7 @@ $(a2dir):; mkdir $@ # Table for Figure 1C of Menke+20 # ------------------------------- -a2mk20f1c = $(a2dir)/columns.txt +a2mk20f1c = $(a2dir)/tools-per-year.txt $(a2mk20f1c): $(mk20tab3) | $(a2dir) # Remove the (possibly) produced figure that is created from this @@ -35,12 +35,37 @@ $(a2mk20f1c): $(mk20tab3) | $(a2dir) # multiple files with a fixed prefix. rm -f $(tikzdir)/figure-tools-per-year* + # Write the column metadata in a temporary file name (appending + # '.tmp' to the actual target name). Once all steps are done, it is + # renamed to the final target. We do this because if there is an + # error in the middle, Make will not consider the job to be + # complete and will stop here. + echo "# Data of plot showing fraction of papers that mentioned software tools" > $@.tmp + echo "# per year to demonstrate the features of Maneage (MANaging data linEAGE)." >> $@.tmp + >> $@.tmp + echo "# Raw data taken from Menke+2020 (https://doi.org/10.1101/2020.01.15.908111)." \ + >> $@.tmp + echo "# " >> $@.tmp + echo "# Column 1: YEAR [count, u16] Publication year of papers." \ + >> $@.tmp + echo "# Column 2: WITH_TOOLS [frac, f32] Fraction of papers mentioning software tools." \ + >> $@.tmp + echo "# Column 3: NUM_PAPERS [count, u32] Total number of papers studied in that year." \ + >> $@.tmp + echo "# " >> $@.tmp + $(call print-copyright, $@.tmp) + + # Find the maximum number of papers. awk '!/^#/{all[$$1]+=$$2; id[$$1]+=$$3} \ END{ for(year in all) \ - print year, 100*id[year]/all[year], all[year] \ + printf("%-7d%-10.3f%d\n", year, 100*id[year]/all[year], \ + all[year]) \ }' $< \ - > $@ + >> $@.tmp + + # Write it into the final target + mv $@.tmp $@ @@ -50,7 +75,7 @@ $(a2mk20f1c): $(mk20tab3) | $(a2dir) $(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf # Find the first year (first column of first row) of data. - v=$$(awk 'NR==1{print $$1}' $(a2mk20f1c)) + v=$$(awk '!/^#/ && c==0{c++; print $$1}' $(a2mk20f1c)) echo "\newcommand{\menkefirstyear}{$$v}" > $@ # Find the number of rows in the plotted table. diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index fe9c103..b0701f4 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -213,8 +213,9 @@ $(lockdir): | $(BDIR); mkdir $@ # we want to ensure that the file is always built in every run: it contains # the project version which may change between two separate runs, even when # no file actually differs. -packagebasename := $(shell if [ -d .git ]; then \ - echo paper-$$(git describe --dirty --always --long); else echo NOGIT; fi) +project-commit-hash := $(shell if [ -d .git ]; then \ + echo $$(git describe --dirty --always --long); else echo NOGIT; fi) +packagebasename := paper-$(project-commit-hash) packagecontents = $(texdir)/$(packagebasename) .PHONY: all clean dist dist-zip distclean clean-mmap $(packagecontents) \ $(mtexdir)/initialize.tex @@ -373,6 +374,31 @@ dist-zip: $(packagecontents) +# Print Copyright statement +# ------------------------- +# +# This statement can be used in published datasets that are in plain-text +# format. It assumes you have already put the data-specific statements in +# its first argument, it will supplement them with general project links. +print-copyright = \ + echo "\# Project title: $(metadata-title)" >> $(1); \ + echo "\# Git commit (that produced this dataset): $(packagebasename)" >> $(1); \ + echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \ + if [ x$(metadata-arxiv) != x ]; then echo "\# arXiv:$(metadata-arxiv)" >> $(1); fi; \ + if [ x$(metadata-doi-journal) != x ]; then \ + echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \ + if [ x$(metadata-doi-zenodo) != x ]; then \ + echo "\# DOI (Zenodo): $(metadata-doi-zenodo)" >> $(1); fi; \ + echo "\#" >> $(1); \ + echo "\# Copyright (C) $$(date +%Y) $(metadata-copyright-owner)" >> $(1); \ + echo "\# Dataset is available under $(metadata-copyright)." >> $(1); \ + echo "\# License URL: $(metadata-copyright-url)" >> $(1); + + + + + + # Project initialization results # ------------------------------ # @@ -381,8 +407,5 @@ dist-zip: $(packagecontents) # calculated everytime the project is run. So even though this file # actually exists, it is also aded as a `.PHONY' target above. $(mtexdir)/initialize.tex: | $(mtexdir) - - # Version of the project. - @if [ -d .git ]; then v=$$(git describe --dirty --always --long); - else v=NO-GIT; fi - echo "\newcommand{\projectversion}{$$v}" > $@ + echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@ + echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@ diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 088b3b3..1573920 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -107,14 +107,14 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) # Verify the figure datasets. $(call verify-txt-no-comments-leading-space, \ - $(delete-num), ad345e873e6af577f0e4e7c8942cdf08) - $(call verify-txt-no-comments-leading-space, \ - $(delete-histogram), 12a81c4c8c5f552e5ed5686453587fe8) + $(a2mk20f1c), 76fc5b13495c4d8e8e6f8d440304cf69) # Verify TeX macros (the values that go into the PDF text). for m in $(verify-check); do file=$(mtexdir)/$$m.tex - if [ $$m == download ]; then s=XXXXX + if [ $$m == download ]; then s=64da83ee3bfaa236849927cdc001f5d3 + elif [ $$m == format ]; then s=e04d95a539b5540c940bf48994d8d45f + elif [ $$m == demo-plot ]; then s=2504472bd2b3f60b5a26c5f2a3a67251 else echo; echo "'$$m' not recognized."; exit 1 fi $(call verify-txt-no-comments-leading-space, $$file, $$s) -- cgit v1.2.1