From bc860926afc0f5486d59f3f186847445bf6f314a Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Wed, 1 Jan 2020 19:39:36 +0000 Subject: Verification of output values and data added within template Until now, the only verification that the template provided was the published PDF. Users had to manually compare the published and generated PDFs (numbers, plots, tables) and see if they obtained the same result. However, this type of manual verification is not good and is prone to frustration and missing important differences. With this commit, a new Makefile has been added in the analysis steps: `verify.mk'. It provides facilities to easily verify the results that go into the paper. For example tables that go into making the paper's plots, or the LaTeX macros that blend into the text. See the updated parts in `README-hacking.md` for a more complete explanation. This completes task #15497. --- reproduce/analysis/config/verify-outputs.mk | 3 + reproduce/analysis/make/delete-me.mk | 37 +++++---- reproduce/analysis/make/paper.mk | 11 +-- reproduce/analysis/make/top-make.mk | 1 + reproduce/analysis/make/verify.mk | 116 ++++++++++++++++++++++++++++ 5 files changed, 146 insertions(+), 22 deletions(-) create mode 100644 reproduce/analysis/config/verify-outputs.mk create mode 100644 reproduce/analysis/make/verify.mk (limited to 'reproduce/analysis') diff --git a/reproduce/analysis/config/verify-outputs.mk b/reproduce/analysis/config/verify-outputs.mk new file mode 100644 index 0000000..5d8eff1 --- /dev/null +++ b/reproduce/analysis/config/verify-outputs.mk @@ -0,0 +1,3 @@ +# To disable verification of output datasets set this variable to yes + +verify-outputs = yes diff --git a/reproduce/analysis/make/delete-me.mk b/reproduce/analysis/make/delete-me.mk index eb6d919..3ba4909 100644 --- a/reproduce/analysis/make/delete-me.mk +++ b/reproduce/analysis/make/delete-me.mk @@ -23,10 +23,10 @@ # ------------- # # We will use AWK to generate a table showing X and X^2 and draw its plot. -dmdir = $(texdir)/delete-me -dm = $(dmdir)/data.txt -$(dmdir): | $(texdir); mkdir $@ -$(dm): $(pconfdir)/delete-me-num.mk | $(dmdir) +delete-numdir = $(texdir)/delete-me-num +delete-num = $(delete-numdir)/data.txt +$(delete-numdir): | $(texdir); mkdir $@ +$(delete-num): $(pconfdir)/delete-me-num.mk | $(delete-numdir) # When the plotted values are re-made, it is necessary to also # delete the TiKZ externalized files so the plot is also re-made. @@ -44,10 +44,10 @@ $(dm): $(pconfdir)/delete-me-num.mk | $(dmdir) # # For an example image, we'll make a PDF copy of the WFPC II image to # display in the paper. -dddemodir = $(texdir)/delete-me-demo -$(dddemodir): | $(texdir); mkdir $@ -demopdf = $(dddemodir)/wfpc2.pdf -$(demopdf): $(dddemodir)/%.pdf: $(indir)/%.fits | $(dddemodir) +delete-demodir = $(texdir)/delete-me-demo +$(delete-demodir): | $(texdir); mkdir $@ +delete-pdf = $(delete-demodir)/wfpc2.pdf +$(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir) # When the plotted values are re-made, it is necessary to also # delete the TiKZ externalized files so the plot is also re-made. @@ -64,8 +64,9 @@ $(demopdf): $(dddemodir)/%.pdf: $(indir)/%.fits | $(dddemodir) # ------------------------ # # For an example plot, we'll show the pixel value histogram also. -histogram = $(dddemodir)/wfpc2-hist.txt -$(histogram): $(dddemodir)/%-hist.txt: $(indir)/%.fits | $(dddemodir) +delete-histogram = $(delete-demodir)/wfpc2-hist.txt +$(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \ + | $(delete-demodir) # When the plotted values are re-made, it is necessary to also # delete the TiKZ externalized files so the plot is also re-made. @@ -83,8 +84,9 @@ $(histogram): $(dddemodir)/%-hist.txt: $(indir)/%.fits | $(dddemodir) # # This is just as a demonstration on how to get analysic configuration # parameters from variables defined in `reproduce/analysis/config/'. -stats = $(dddemodir)/wfpc2-stats.txt -$(stats): $(dddemodir)/%-stats.txt: $(indir)/%.fits | $(dddemodir) +delete-stats = $(delete-demodir)/wfpc2-stats.txt +$(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \ + | $(delete-demodir) aststatistics $< -h0 --mean --median > $@ @@ -98,7 +100,8 @@ $(stats): $(dddemodir)/%-stats.txt: $(indir)/%.fits | $(dddemodir) # # NOTE: In LaTeX you cannot use any non-alphabetic character in a variable # name. -$(mtexdir)/delete-me.tex: $(dm) $(demopdf) $(histogram) $(stats) +$(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \ + $(delete-stats) # Write the number of random values used. echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@ @@ -112,15 +115,15 @@ $(mtexdir)/delete-me.tex: $(dm) $(demopdf) $(histogram) $(stats) # values, then using it again to read each separately to use in the # macro definition. mm=$$(awk 'BEGIN{min=99999; max=-min} - {if($$2>max) max=$$2; if($$2max) max=$$2; if($$2> $@ v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}'); echo "\newcommand{\deletememax}{$$v}" >> $@ # Write the statistics of the WFPC2 image as a macro. - mean=$$(awk '{printf("%.2f", $$1)}' $(stats)) + mean=$$(awk '{printf("%.2f", $$1)}' $(delete-stats)) echo "\newcommand{\deletemewfpctwomean}{$$mean}" >> $@ - median=$$(awk '{printf("%.2f", $$2)}' $(stats)) + median=$$(awk '{printf("%.2f", $$2)}' $(delete-stats)) echo "\newcommand{\deletemewfpctwomedian}{$$median}" >> $@ diff --git a/reproduce/analysis/make/paper.mk b/reproduce/analysis/make/paper.mk index f76f5de..a4eeb2e 100644 --- a/reproduce/analysis/make/paper.mk +++ b/reproduce/analysis/make/paper.mk @@ -28,17 +28,18 @@ # `$(mtexdir)/project.tex' is actually just a combination of separate files # that keep the LaTeX macros related to each workhorse Makefile (in # `reproduce/src/make/*.mk'). Those individual macros are pre-requisites to -# `$(mtexdir)/project.tex'. The only workhorse Makefile that doesn't need -# to produce LaTeX macros is this Makefile (`reproduce/src/make/paper.mk'). +# `$(mtexdir)/verify.tex' which will check them before starting to build +# the paper. The only workhorse Makefile that doesn't need to produce LaTeX +# macros is this Makefile (`reproduce/src/make/paper.mk'). # -# This file is thus the interface between the processing scripts and the -# final PDF: when we get to this point, all the processing has been +# This file is thus the interface between the analysis/processing steps and +# the final PDF: when we get to this point, all the processing has been # completed. # # Note that if you don't want the final PDF and just want the processing # and file outputs, you can remove the value of `pdf-build-final' in # `reproduce/analysis/config/pdf-build.mk'. -$(mtexdir)/project.tex: $(foreach s, $(subst paper,,$(makesrc)), $(mtexdir)/$(s).tex) +$(mtexdir)/project.tex: $(mtexdir)/verify.tex # If no PDF is requested, or if LaTeX isn't available, don't # continue to building the final PDF. Otherwise, merge all the TeX diff --git a/reproduce/analysis/make/top-make.mk b/reproduce/analysis/make/top-make.mk index 27fe16a..0292d3c 100644 --- a/reproduce/analysis/make/top-make.mk +++ b/reproduce/analysis/make/top-make.mk @@ -113,6 +113,7 @@ endif makesrc = initialize \ download \ delete-me \ + verify \ paper diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk new file mode 100644 index 0000000..440ac57 --- /dev/null +++ b/reproduce/analysis/make/verify.mk @@ -0,0 +1,116 @@ +# Verify the project outputs before building the paper. +# +# Copyright (C) 2020 Mohammad Akhlaghi +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# A copy of the GNU General Public License is available at +# . + + + + + +# Verification functions +# ---------------------- +# +# These functions are used by the final rule in this Makefil +verify-print-tips = \ + echo "If you are still developing your project, you can disable"; \ + echo "verification by removing the value of the variable in the"; \ + echo "following file (from the top project source directory):"; \ + echo " reproduce/analysis/config/verify-outputs.mk"; \ + echo; \ + echo "If this is the final version of the file, you can just copy"; \ + echo "and paste the calculated checksum (above) for the file in"; \ + echo "the following project source file:"; \ + echo " reproduce/analysis/make/verify.mk" + +verify-txt-no-comments-leading-space = \ + infile=$(strip $(1)); \ + inchecksum=$(strip $(2)); \ + checksum=$$(sed -e 's/^[[:space:]]*//g' \ + -e 's/\#.*$$//' \ + -e '/^$$/d' $$infile \ + | md5sum \ + | awk '{print $$1}'); \ + if [ x"$$inchecksum" = x"$$checksum" ]; then \ + echo "Verified: $$infile"; \ + else \ + echo; \ + echo "VERIFICATION ERROR"; \ + echo "------------------"; \ + $(call verify-print-tips); \ + echo; \ + echo "Checked file (without empty or commented lines):"; \ + echo " $$infile"; \ + echo "Expected MD5 checksum: $$inchecksum"; \ + echo "Calculated MD5 checksum: $$checksum"; \ + echo; exit 1; \ + fi; + + + + + +# Final verification TeX macro (can be empty) +# ------------------------------------------- +# +# This is the FINAL analysis step (before going onto the paper. Please use +# this step to veryify the contents of the figures/tables used in the paper +# and the LaTeX macros generated from all your processing. It should depend +# on all the LaTeX macro files that are generated (their contents will be +# checked), and any files that go into the tables/figures of the paper +# (generated in various stages of the analysis. +# +# Since each analysis step's data files are already prerequisites of their +# respective TeX macro file, its enough for `verify.tex' to depend on the +# final TeX macro. +# +# USEFUL TIP: during the early phases of your research (when you are +# developing your analysis, and the values aren't final), you can comment +# the respective lines. +# +# Here is a description of the variables defined here. +# +# verify-dep: The major step dependencies of `verify.tex', this includes +# all the steps that must be finished before it. +# +# verify-changes: The files whose contents are important. This is +# essentially the same as `verify-dep', but it has removed +# the `initialize' step (which is information about the +# pipeline, not the results). +verify-dep = $(subst verify,,$(subst paper,,$(makesrc))) +verify-check = $(subst initialize,,$(verify-dep)) +$(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) + + # Make sure that verification is actually requested. + if [ x"$(verify-outputs)" = xyes ]; then + + # Verify the figure datasets. + $(call verify-txt-no-comments-leading-space, \ + $(delete-num), ad345e873e6af577f0e4e7c8942cdf08) + $(call verify-txt-no-comments-leading-space, \ + $(delete-histogram), 12a81c4c8c5f552e5ed5686453587fe8) + + # Verify TeX macros (the values that go into the PDF text). + for m in $(verify-check); do + file=$(mtexdir)/$$m.tex + if [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705 + elif [ $$m == download ]; then s=6749e17ce606d57d30cebdbc1a5d23ad + else echo; echo "'$$m' not recognized."; exit 1 + fi + $(call verify-txt-no-comments-leading-space, $$file, $$s) + done + fi + + # Make an empty final target. + touch $@ -- cgit v1.2.1