aboutsummaryrefslogtreecommitdiff
path: root/reproduce/software
diff options
context:
space:
mode:
authorMohammad Akhlaghi <mohammad@akhlaghi.org>2025-12-30 12:57:42 +0100
committerMohammad Akhlaghi <mohammad@akhlaghi.org>2025-12-30 13:20:32 +0100
commit57de4c9a63fcc7683108762a6b58be6ea1ecad35 (patch)
treecb5d09391c534761b5c39e770b87fe1b89dfc0b4 /reproduce/software
parent7719b8c404e5ccba010c1e5cce0a5dfd905a29ef (diff)
Configuration: Gnuastro not writing commit, versions, date or optionsHEADmaneage
Summary: your project will only be affected if it uses Gnuastro and depended on the FITS keywords that kept the information above (in the 0th HDU). Until now, there was no Maneage-specific configuration for Gnuastro, so as in its default/manual operation, Gnuastro would keep all the metadata that it keeps in manual/default mode. However, in a large pipeline that involves the temporary creation of thousands of FITS files (which are usually deleted shortly after), such metadata are just overhead. For the final products of the pipeline, it is the responsibility of the pipeline designer to only keep these in the final products of the pipeline, not all the intermediate files. With this commit, the default installation of Gnuastro in Maneage disables all such metadata in its products in the output FITS files and provides tips for users on the final metadata to include in their pipeline's outputs.
Diffstat (limited to 'reproduce/software')
-rw-r--r--reproduce/software/make/high-level.mk38
1 files changed, 37 insertions, 1 deletions
diff --git a/reproduce/software/make/high-level.mk b/reproduce/software/make/high-level.mk
index 8ee505c..cd628ec 100644
--- a/reproduce/software/make/high-level.mk
+++ b/reproduce/software/make/high-level.mk
@@ -1236,22 +1236,58 @@ $(ibidir)/ghostscript-$(ghostscript-version): \
# Gnuastro can optionally depend on libgit2, but it is not included as a
# dependency here for the two reasons below. If you would like to have it,
# add it as a dependency (its build instruction and dependencies are here
-# already) and remove the '--without-libgit2' option in the recipe).
+# already), then remove the '--without-libgit2' configure option) and also
+# comment the following two generic configuration lines: 'outfitsnocommit'
+# and 'outfitsnoconfig'.
# - Within Maneage, we have everything under Git already and users are
# expected to include the version in all their products.
# - libgit2 can only be built with CMake (which takes extremely long to
# compile: possibly even longer than GCC!).
+# - Maneage is often run on HPCs that can have network connections
+# between the storage and processer and the necessary libgit2
+# operations on every created file can have a performance impact.
$(ibidir)/gnuastro-$(gnuastro-version): \
$(ibidir)/gsl-$(gsl-version) \
$(ibidir)/wcslib-$(wcslib-version) \
$(ibidir)/libjpeg-$(libjpeg-version) \
$(ibidir)/libtiff-$(libtiff-version) \
$(ibidir)/ghostscript-$(ghostscript-version)
+
+# Generic installation.
tarball=gnuastro-$(gnuastro-version).tar.lz
$(call import-source, $(gnuastro-url), $(gnuastro-checksum))
$(call gbuild, gnuastro-$(gnuastro-version), static, \
--without-libgit2, -j$(numthreads))
cp $(dtexdir)/gnuastro.tex $(ictdir)/
+
+# Generally, besides the Git commit, we are also disabling the
+# default mode of printing any type of metadata and versions of
+# dependencies in output headers (through the installation-wide
+# configuration file). This is done because within a large pipeline,
+# Gnuastro is used to create many intermediate files (that are
+# deleted shortly after being created) and it is not worth the
+# overhread to keep this information in those intermediate products:
+# it is the pipeline's responsibility to put them in the final
+# outputs. We recommend to use the Gnuastro Fits program's keyword
+# writing options to add as much contextual metadata on your final
+# products as possible. Some tips:
+# - The creation date is not good (because it is not reproducible
+# and will make simple validation hard). The project's Commit
+# should be used instead.
+# - Define a keyword to keep the public URL of the repository of
+# the Maneage'd project. In this way, a person who gets your
+# final product can easily check that for all the information
+# (including software versions and configuration options).
+ gconf=$(idir)/etc/gnuastro/gnuastro.conf
+ echo "" >> $$gconf
+ echo "# Maneage specific (see Gnuastro build rule for details)." \
+ >> $$gconf
+ echo " outfitsnocommit = 1" >> $$gconf
+ echo " outfitsnoconfig = 1" >> $$gconf
+ echo " outfitsnodate = 1" >> $$gconf
+ echo " outfitsnoversions = 1" >> $$gconf
+
+# Final target.
echo "GNU Astronomy Utilities $(gnuastro-version) \citep{gnuastro}" > $@
$(ibidir)/icu-$(icu-version): $(ibidir)/python-$(python-version)