From 6322d4f3961bc4b275707366d9bb2703ab98be79 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Sat, 4 Jul 2020 21:48:10 +0100 Subject: Better names and comments in INPUTS.conf Until now, the dataset's configuration names had a 'WFPC2' prefix. But this very alien to anyone that is not familiar with the history of the Hubble Space Telescope (the camera is no longer used! Its just used here since its one of the standard FITS files from the FITS standard webpage). With this commit the variable names have been modified to be more readable and clear (having a 'DEMO-' prefix). Also the comments of 'INPUTS.conf' (describing the purpose of each variable) were edited and made more clear. --- reproduce/analysis/config/INPUTS.conf | 43 +++++++++++++++++++---------------- reproduce/analysis/make/delete-me.mk | 12 +++++----- reproduce/analysis/make/download.mk | 5 ++-- 3 files changed, 32 insertions(+), 28 deletions(-) (limited to 'reproduce/analysis') diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf index 5e6c425..60abd49 100644 --- a/reproduce/analysis/config/INPUTS.conf +++ b/reproduce/analysis/config/INPUTS.conf @@ -5,30 +5,34 @@ # # Necessary variables for each input dataset are listed below. Its good # that all the variables of each file have the same base-name (in the -# example below 'WFPC2') with descriptive suffixes, also put a short -# comment above each group of variables for each dataset, shortly -# explaining what it is. +# example below 'DEMO') with descriptive suffixes, also put a short comment +# above each group of variables for each dataset, shortly explaining what +# it is. # -# 1) Local file name ('WFPC2IMAGE' below): this is the name of the dataset +# 1) Local file name ('DEMO-DATA' below): this is the name of the dataset # on the local system (in 'INDIR', given at configuration time). It is # recommended that it be the same name as the online version of the -# file like the case here (note how this variable is used in 'WFPC2URL' +# file like the case here (note how this variable is used in 'DEMO-URL' # for the dataset's full URL). However, this is not always possible, so # the local and server filenames may be different. Ultimately, the file # name is irrelevant, we check the integrity with the checksum. # -# 2) The MD5 checksum of the file ('WFPC2MD5' below): this is very +# 2) The MD5 checksum of the file ('DEMO-MD5' below): this is very # important for an automatic verification of the file. You can -# calculate it by running 'md5sum' on your desired file. +# calculate it by running 'md5sum' on your desired file. You can also +# use any other checksum tool that you prefer, just be sure to correct +# the respective command in 'reproduce/analysis/make/download.mk'. # -# 3) The human-readable size of the file ('WFPC2SIZE' below): this is an -# optional feature which you can use for in the script that is loaded -# at configure time ('reproduce/software/shell/configure.sh'). When -# asking for the input-data directory, you can print some basic -# information of the files for users to get a better feeling of the -# volume. See that script for an example using this demo dataset. +# 3) The human-readable size of the file ('DEMO-SIZE' below): this is an +# optional variable, mainly to help a reader of your project get a +# sense of the volume they need to download if they don't already have +# the dataset. So it is highly recommended to add it (future readers of +# your project's source will appreciate it!). You can get it from the +# output of 'ls -lh' command on the file. Optionally you can use it in +# messages during the configuration phase (when Maneage asks for the +# input data directory), along with other info about the file(s). # -# 4) The full dataset URL ('WFPC2URL' below): this is the full URL +# 4) The full dataset URL ('DEMO-URL' below): this is the full URL # (including the file-name) that can be used to download the dataset # when necessary. Also, see the description above on local filename. # @@ -43,9 +47,8 @@ -# Demonstration image used in the histogram plot (remove this when -# customizing). -WFPC2IMAGE = WFPC2ASSNu5780205bx.fits -WFPC2MD5 = a4791e42cd1045892f9c41f11b50bad8 -WFPC2SIZE = 62kb -WFPC2URL = https://fits.gsfc.nasa.gov/samples/$(WFPC2IMAGE) +# Demo dataset used in the histogram plot (remove when customizing). +DEMO-DATA = WFPC2ASSNu5780205bx.fits +DEMO-MD5 = a4791e42cd1045892f9c41f11b50bad8 +DEMO-SIZE = 62K +DEMO-URL = https://fits.gsfc.nasa.gov/samples/$(DEMO-DATA) diff --git a/reproduce/analysis/make/delete-me.mk b/reproduce/analysis/make/delete-me.mk index f45f9ea..bc94bf1 100644 --- a/reproduce/analysis/make/delete-me.mk +++ b/reproduce/analysis/make/delete-me.mk @@ -61,8 +61,8 @@ $(dm-squared): $(pconfdir)/delete-me-squared-num.conf | $(tex-publish-dir) -# WFPC2 image PDF -# ----------------- +# Demo image PDF +# -------------- # # For an example image, we'll make a PDF copy of the WFPC II image to # display in the paper. @@ -82,8 +82,8 @@ $(dm-img-pdf): $(dm-histdir)/%.pdf: $(indir)/%.fits | $(dm-histdir) -# Histogram of WFPC2 image -# ------------------------ +# Histogram of demo image +# ----------------------- # # For an example plot, we'll show the pixel value histogram also. IMPORTANT # NOTE: because this histogram contains data that is included in a plot, we @@ -103,7 +103,7 @@ $(dm-img-histogram): $(tex-publish-dir)/%-histogram.txt: $(indir)/%.fits \ # metadata from '$@.data', and add copyright. echo "# Histogram of example image to demonstrate Maneage (MANaging data linEAGE)." \ > $@.tmp - echo "# Example image URL: $(WFPC2URL)/$(WFPC2IMAGE)" >> $@.tmp + echo "# Example image URL: $(DEMO-URL)" >> $@.tmp echo "# " >> $@.tmp awk '/^# Column .:/' $@.data >> $@.tmp echo "# " >> $@.tmp @@ -162,7 +162,7 @@ $(mtexdir)/delete-me.tex: $(dm-squared) $(dm-img-pdf) $(dm-img-histogram) \ v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}'); echo "\newcommand{\deletememax}{$$v}" >> $@ - # Write the statistics of the WFPC2 image as a macro. + # Write the statistics of the demo image as a macro. mean=$$(awk '{printf("%.2f", $$1)}' $(dm-img-stats)) echo "\newcommand{\deletemewfpctwomean}{$$mean}" >> $@ median=$$(awk '{printf("%.2f", $$2)}' $(dm-img-stats)) diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk index bc8b8ce..fb3f523 100644 --- a/reproduce/analysis/make/download.mk +++ b/reproduce/analysis/make/download.mk @@ -58,7 +58,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # Set the necessary parameters for this input file. if [ $* = wfpc2 ]; then - localname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); + localname=$(DEMO-DATA); url=$(DEMO-URL); mdf=$(DEMO-MD5); else echo; echo; echo "Not recognized input dataset: '$*.fits'." echo; echo; exit 1 @@ -84,6 +84,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) sum=$$(md5sum $$unchecked | awk '{print $$1}') if [ $$sum = $$mdf ]; then mv $$unchecked $@ + echo "Integrity confirmed, using $@ in this project." else echo; echo; echo "Wrong MD5 checksum for input file '$$localname':" @@ -102,4 +103,4 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # It is very important to mention the address where the data were # downloaded in the final report. $(mtexdir)/download.tex: $(pconfdir)/INPUTS.conf | $(mtexdir) - echo "\\newcommand{\\wfpctwourl}{$(WFPC2URL)}" > $@ + echo "\\newcommand{\\wfpctwourl}{$(DEMO-URL)}" > $@ -- cgit v1.2.1 From cedea21b101bc1a3af90f0c97b5bb768311630fd Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Sat, 4 Jul 2020 22:15:45 +0100 Subject: Commit hash of Maneage branch used to build project as LaTeX macro To help in the documentation, the Git hash of the Maneage branch commit that the project has most recently merged with (or branched from) is now also provided as a LaTeX macro ('\maneageversion'). It is calculated in 'reproduce/analysis/make/initialize.mk' (in the recipe to 'initialize.tex'). --- reproduce/analysis/make/initialize.mk | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'reproduce/analysis') diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 315be1a..dff5eca 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -465,3 +465,9 @@ $(mtexdir)/initialize.tex: | $(mtexdir) # Version and title of project. echo "\newcommand{\projecttitle}{$(metadata-title)}" > $@ echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@ + + # Calculate the latest Maneage commit used to build this + # project. Note that the '--dirty' option isn't applicable to + # "commit-ishes" (direct quote from Git's error message!). + v=$$(git describe --always --long maneage) + echo "\newcommand{\maneageversion}{$$v}" >> $@ -- cgit v1.2.1 From 5d97210eef4ba7804501c28b0ddeb9ffe1e23064 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Sun, 5 Jul 2020 16:18:45 +0100 Subject: Removing possibly existing paper.bbl before remaking it Until now, when the bibliography file ('paper.bbl') had a LaTeX-related error (for example the journal name was a LaTeX macro that isn't defined), the first 'pdflatex' command that is run before 'biber' would crash, not allowing the project to reach 'biber'. So the user would have to manually remove 'paper.bbl' before running './project make'. With this commit, we remove any possibly existing 'paper.bbl' file before rebuilding it. Generally, this also helps in keeping things clean during the generation of the new bibliography. This bug was found by Mahdieh Nabavi. --- reproduce/analysis/make/paper.mk | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'reproduce/analysis') diff --git a/reproduce/analysis/make/paper.mk b/reproduce/analysis/make/paper.mk index 5227e55..e207337 100644 --- a/reproduce/analysis/make/paper.mk +++ b/reproduce/analysis/make/paper.mk @@ -101,6 +101,12 @@ $(texbdir)/paper.bbl: tex/src/references.tex $(mtexdir)/dependencies-bib.tex \ export TEXINPUTS=$$p: cd $(texbdir); + # Delete any possibly existing target (a '.bbl' file) to avoid + # complications with LaTeX being run before the command that + # generates it. Otherwise users will have to manually delete + # it. It will be built anyway once this rule is done. + rm -f $@ + # The pdflatex option '-shell-escape' is "normally disallowed for # security reasons" according to the `info pdflatex' manual, but # is enabled here in order to allow the use of PGFPlots. If you -- cgit v1.2.1 From e1f10ac4516f64019204cadfb05dc9fe4b617d35 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Tue, 7 Jul 2020 13:58:04 +0100 Subject: Project distribution tarball can account for no PDFs in tex/tikz Until now the './project make dist' command implicitly assumed that the 'tex/tikz' directory always contains PDF files (because of the 'cp tex/tikz/*.pdf $$dir/tex/tikz' line). This was annoying for projects that don't use TiKZ or PGFPlots to generate their plots, and they had to manually comment this line. With this commit a check has been placed to see if any PDF files exist in there at all. If there aren't PDF files, the 'cp' command above is ignored. --- reproduce/analysis/make/initialize.mk | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'reproduce/analysis') diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index dff5eca..29cd2dc 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -304,10 +304,15 @@ $(project-package-contents): paper.pdf | $(texdir) # Copy all the necessary `reproduce' and `tex' contents. shopt -s extglob cp -r tex/src $$dir/tex/src - cp tex/tikz/*.pdf $$dir/tex/tikz cp -r reproduce/* $$dir/reproduce cp -r tex/build/!($(project-package-name)) $$dir/tex/build + # If the project has any PDFs in its 'tex/tikz' directory (TiKZ or + # PGFPlots was used to generate them), copy them too. + if ls tex/tikz/*.pdf &> /dev/null; then + cp tex/tikz/*.pdf $$dir/tex/tikz + fi + # Clean up un-necessary/local files: 1) the $(texdir)/build* # directories (when building in a group structure, there will be # `build-user1', `build-user2' and etc), are just temporary LaTeX -- cgit v1.2.1 From 2ed8a2d60bc991ad06411b2aab43989a64a59a2d Mon Sep 17 00:00:00 2001 From: Boud Roukema Date: Mon, 20 Jul 2020 18:31:43 +0200 Subject: make dist: only archive files that are under version control Until this commit, the '$(project-package-contents)' rules in 'reproduce/analysis/make/initialize.mk' included a line to provide all contents, recursively, of the directory 'reproduce/' in the package for further distribution. This could potentially lead to the distribution of private working files that are used during development and not intended for general distribution. With this commit, only those files in 'reproduce/' and 'tex/src' that are under version control are copied to the temporary directory (that is later used for creating an archive). With this change, the archiving commands actually became more clean (we don't have to manually remove 'LOCAL.conf' or other temporary files). Extensive comments have also been added above each step to clarify each step's purpose and method. --- reproduce/analysis/make/initialize.mk | 48 ++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 17 deletions(-) (limited to 'reproduce/analysis') diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 29cd2dc..211c1c0 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -298,32 +298,46 @@ $(project-package-contents): paper.pdf | $(texdir) sed -e's|\\newcommand{\\makepdf}{}|%\\newcommand{\\makepdf}{}|' \ paper.tex > $$dir/paper.tex - # Build the top-level directories. - mkdir $$dir/reproduce $$dir/tex $$dir/tex/tikz $$dir/tex/build - - # Copy all the necessary `reproduce' and `tex' contents. + # Copy ONLY the version-controlled files in 'reproduce' and + # 'tex/src'. This is important because files like 'LOCAL.conf' (in + # 'reproduce/software/config') should not be archived, they contain + # information about the host computer and are irrelevant for + # others. Also some project authors may have temporary files here + # that are not under version control and thus shouldn't be archived + # (although this is bad practice, but that is up to the user). + # + # To keep the sub-directory structure, we are packaging the files + # with Tar, piping it, and unpacking it in the archive + # directory. So afterwards we need to come back to the current + # directory. + tar -c -f - $$(git ls-files reproduce tex/src) \ + | (cd $$dir ; tar -x -f -) + cd $(curdir) + + # Build the other two subdirectories of 'tex/' that we need in the + # archive (in the actual project, these are symbolic links to the + # build directory). + mkdir $$dir/tex/tikz $$dir/tex/build + + # Copy the 'tex/build' directory into the archive (excluding the + # temporary archive directory that we are now copying to). We will + # be using Bash's extended globbing ('extglob') for excluding this + # directory. shopt -s extglob - cp -r tex/src $$dir/tex/src - cp -r reproduce/* $$dir/reproduce cp -r tex/build/!($(project-package-name)) $$dir/tex/build + # Clean up the $(texdir)/build* directories in the archive (when + # building in a group structure, there will be `build-user1', + # `build-user2' and etc). These are just temporary LaTeX build + # files and don't have any relevant/hand-written files in them. + rm -rf $$dir/tex/build/build* + # If the project has any PDFs in its 'tex/tikz' directory (TiKZ or # PGFPlots was used to generate them), copy them too. if ls tex/tikz/*.pdf &> /dev/null; then cp tex/tikz/*.pdf $$dir/tex/tikz fi - # Clean up un-necessary/local files: 1) the $(texdir)/build* - # directories (when building in a group structure, there will be - # `build-user1', `build-user2' and etc), are just temporary LaTeX - # build files and don't have any relevant/hand-written files in - # them. 2) The `LOCAL.conf' and `gnuastro-local.conf' files just - # have this machine's local settings and are irrelevant for anyone - # else. - rm -rf $$dir/tex/build/build* - rm $$dir/reproduce/software/config/LOCAL.conf - rm $$dir/reproduce/analysis/config/gnuastro/gnuastro-local.conf - # When submitting to places like arXiv, they will just run LaTeX # once and won't run `biber'. So we need to also keep the `.bbl' # file into the distributing tarball. However, BibLaTeX is -- cgit v1.2.1 From 2bfe5e16a0ba8198d9a77d6e36c17ac5daed7705 Mon Sep 17 00:00:00 2001 From: Boud Roukema Date: Mon, 20 Jul 2020 18:15:12 +0200 Subject: README-hacking.md: clarify Zenodo usage in publication checklist This commit clarifies the initial usage of Zenodo for reserving a Zenodo identifier and starting an 'unpublished' upload. Some other minor wording changes are done here. --- reproduce/analysis/make/verify.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'reproduce/analysis') diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 69711d5..b3d62f2 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -22,7 +22,7 @@ # Verification functions # ---------------------- # -# These functions are used by the final rule in this Makefil +# These functions are used by the final rule in this Makefile verify-print-error-start = \ echo; \ echo "VERIFICATION ERROR"; \ @@ -87,7 +87,7 @@ verify-txt-no-comments-no-space = \ # ------------------------------------------- # # This is the FINAL analysis step (before going onto the paper. Please use -# this step to veryify the contents of the figures/tables used in the paper +# this step to verify the contents of the figures/tables used in the paper # and the LaTeX macros generated from all your processing. It should depend # on all the LaTeX macro files that are generated (their contents will be # checked), and any files that go into the tables/figures of the paper -- cgit v1.2.1 From 2fadf4ba6f411c0b74d5d443fb01d6380dc34f10 Mon Sep 17 00:00:00 2001 From: Boud Roukema Date: Tue, 21 Jul 2020 18:01:22 +0200 Subject: Printing location when downloaded input data checksum is different There are many different directory trees involved in Maneage system: the top directory, the 'reproduce/' directory and its sub-directories, '.build/' (that point to a user-defined build area), and a possibly user-defined input directory. Until now, in the case of a download checksum failure, it was not immediately obvious [1] to the user *where* the file with a failed checksum is. To clarify to the user *where* the suspicious file is now located, this commit adds a line to 'reproduce/analysis/make/download.mk' to print out this full path location: '$$unchecked' along with the expected and calculated checksums. [1] Euphemism for me spending lots of time debugging and being confused. --- reproduce/analysis/make/download.mk | 1 + 1 file changed, 1 insertion(+) (limited to 'reproduce/analysis') diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk index fb3f523..0eb28ff 100644 --- a/reproduce/analysis/make/download.mk +++ b/reproduce/analysis/make/download.mk @@ -88,6 +88,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) else echo; echo; echo "Wrong MD5 checksum for input file '$$localname':" + echo " File location: $$unchecked"; \ echo " Expected MD5 checksum: $$mdf"; \ echo " Calculated MD5 checksum: $$sum"; \ echo; exit 1 -- cgit v1.2.1 From 32f3ba14f6c6efcef7edea0a365638527721f509 Mon Sep 17 00:00:00 2001 From: Boud Roukema Date: Sun, 2 Aug 2020 00:48:06 +0100 Subject: initialize.mk: accounting for no maneage branch One of the LaTeX macros reported by 'initialize.mk' is the git commit hash of the most recent 'maneage' branch that the project has been branched from. However, not all projects will retain the maneage reference. This can happen for example when people don't push the 'maneage' reference to their repository and then clone from their own repository to a second computer. Therefore, until now, in such situations, Maneage would break with an error. With this commit, in such scenarios, a place holder string is used instead, clearly highlighting that there is no 'maneage' reference. --- reproduce/analysis/make/initialize.mk | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'reproduce/analysis') diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 211c1c0..fca75f5 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -486,7 +486,12 @@ $(mtexdir)/initialize.tex: | $(mtexdir) echo "\newcommand{\projectversion}{$(project-commit-hash)}" >> $@ # Calculate the latest Maneage commit used to build this - # project. Note that the '--dirty' option isn't applicable to - # "commit-ishes" (direct quote from Git's error message!). - v=$$(git describe --always --long maneage) + # project: + # - The '--dirty' option (used in 'project-commit-hash') isn't + # applicable to "commit-ishes" (direct quote from Git's error + # message!). + # - The project may not have the 'maneage' branch (for example + # after cloning from a fork that didn't include it!). In this + # case, we'll just return the string a clear string. + v=$$(git describe --always --long maneage) || v=maneage-ref-missing echo "\newcommand{\maneageversion}{$$v}" >> $@ -- cgit v1.2.1