From 7bdbd6e61f132ac8f6851637d40fd06f8d6b7182 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Tue, 9 Jun 2020 03:58:19 +0100 Subject: Minor edit printing arXiv URL in plain text metadata Until now, in the 'print-copyright' function of 'initialize.mk' (that prints a fixed set of common meta necessary in plain-text files), we were simply printing this line: # Pre-print server: arXiv:1234.56789 But given that all the other elements are click-able URLs, it now prints: # Pre-print server: https://arxiv.org/abs/1234.56789 --- reproduce/analysis/make/initialize.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'reproduce') diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 19447a6..acc527d 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -442,7 +442,7 @@ print-copyright = \ echo "\# Git commit (that produced this dataset): $(project-commit-hash)" >> $(1); \ echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \ if [ x$(metadata-arxiv) != x ]; then \ - echo "\# Pre-print server: arXiv:$(metadata-arxiv)" >> $(1); fi; \ + echo "\# Pre-print server: https://arxiv.org/abs/$(metadata-arxiv)" >> $(1); fi; \ if [ x$(metadata-doi-journal) != x ]; then \ echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \ if [ x$(metadata-doi-zenodo) != x ]; then \ -- cgit v1.2.1 From 2bd2e2f1833300d5102339e2aa417a3099c13960 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Tue, 9 Jun 2020 20:09:09 +0100 Subject: IMPORTANT: bug fix in default data download script of download.mk Summary of possible semantic conflicts 1. The recipe to download input datasets has been modified. You have to re-set the old 'origname' variable to 'localname' (to avoid confusion) and the default dataset URL should now be complete (including the actual filename). See the newly added descriptions in 'INPUTS.conf' for more on this. Until now, when the dataset was already present on the host system, a link couldn't be made to it, causing the project to crash in the checksum phase. This has been fixed with properly naming the main variable as 'localname' to avoid the confusion that caused it. Some other problems have been fixed in this recipe in the meantime: - When the checksum is different, the expected and calculated checksums are printed. - In the default paper, we now print the full URL of the dataset, not just the server, so the checksum of the 'download.tex' step has been updated. --- reproduce/analysis/config/INPUTS.conf | 42 ++++++++++++++++++++++++++++++++--- reproduce/analysis/make/download.mk | 24 +++++++++++--------- reproduce/analysis/make/verify.mk | 2 +- 3 files changed, 54 insertions(+), 14 deletions(-) (limited to 'reproduce') diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf index 6ddaec7..5e6c425 100644 --- a/reproduce/analysis/config/INPUTS.conf +++ b/reproduce/analysis/config/INPUTS.conf @@ -1,6 +1,36 @@ -# Input files necessary for this project. +# Input files necessary for this project, the variables defined in this +# file are primarily used in 'reproduce/analysis/make/download.mk'. See +# there for precise usage of the variables. But comments are also provided +# here. # -# This file is read by the configure script and running Makefiles. +# Necessary variables for each input dataset are listed below. Its good +# that all the variables of each file have the same base-name (in the +# example below 'WFPC2') with descriptive suffixes, also put a short +# comment above each group of variables for each dataset, shortly +# explaining what it is. +# +# 1) Local file name ('WFPC2IMAGE' below): this is the name of the dataset +# on the local system (in 'INDIR', given at configuration time). It is +# recommended that it be the same name as the online version of the +# file like the case here (note how this variable is used in 'WFPC2URL' +# for the dataset's full URL). However, this is not always possible, so +# the local and server filenames may be different. Ultimately, the file +# name is irrelevant, we check the integrity with the checksum. +# +# 2) The MD5 checksum of the file ('WFPC2MD5' below): this is very +# important for an automatic verification of the file. You can +# calculate it by running 'md5sum' on your desired file. +# +# 3) The human-readable size of the file ('WFPC2SIZE' below): this is an +# optional feature which you can use for in the script that is loaded +# at configure time ('reproduce/software/shell/configure.sh'). When +# asking for the input-data directory, you can print some basic +# information of the files for users to get a better feeling of the +# volume. See that script for an example using this demo dataset. +# +# 4) The full dataset URL ('WFPC2URL' below): this is the full URL +# (including the file-name) that can be used to download the dataset +# when necessary. Also, see the description above on local filename. # # Copyright (C) 2018-2020 Mohammad Akhlaghi # @@ -9,7 +39,13 @@ # this notice are preserved. This file is offered as-is, without any # warranty. + + + + +# Demonstration image used in the histogram plot (remove this when +# customizing). WFPC2IMAGE = WFPC2ASSNu5780205bx.fits WFPC2MD5 = a4791e42cd1045892f9c41f11b50bad8 WFPC2SIZE = 62kb -WFPC2URL = https://fits.gsfc.nasa.gov/samples +WFPC2URL = https://fits.gsfc.nasa.gov/samples/$(WFPC2IMAGE) diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk index 71ee7d3..bc8b8ce 100644 --- a/reproduce/analysis/make/download.mk +++ b/reproduce/analysis/make/download.mk @@ -58,7 +58,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # Set the necessary parameters for this input file. if [ $* = wfpc2 ]; then - origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); + localname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); else echo; echo; echo "Not recognized input dataset: '$*.fits'." echo; echo; exit 1 @@ -71,21 +71,25 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # here points to the final file directly (note that `readlink' is # part of GNU Coreutils). If its not a link, the `readlink' part # has no effect. - if [ -f $(INDIR)/$$origname ]; then - ln -fs $$(readlink -f $(INDIR)/$$origname) $$out + unchecked=$@.unchecked + if [ -f $(INDIR)/$$localname ]; then + ln -fs $$(readlink -f $(INDIR)/$$localname) $$unchecked else touch $(lockdir)/download $(downloadwrapper) "wget --no-use-server-timestamps -O" \ - $(lockdir)/download $$url/$$origname $@ + $(lockdir)/download $$url $$unchecked fi # Check the md5 sum to see if this is the proper dataset. - sum=$$(md5sum $@ | awk '{print $$1}') - if [ $$sum != $$mdf ]; then - wrongname=$(dir $@)/wrong-$(notdir $@) - mv $@ $$wrongname - echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname" - echo; echo; exit 1 + sum=$$(md5sum $$unchecked | awk '{print $$1}') + if [ $$sum = $$mdf ]; then + mv $$unchecked $@ + else + echo; echo; + echo "Wrong MD5 checksum for input file '$$localname':" + echo " Expected MD5 checksum: $$mdf"; \ + echo " Calculated MD5 checksum: $$sum"; \ + echo; exit 1 fi diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 67b3fea..69711d5 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -135,7 +135,7 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) # Verify TeX macros (the values that go into the PDF text). for m in $(verify-check); do file=$(mtexdir)/$$m.tex - if [ $$m == download ]; then s=6749e17ce606d57d30cebdbc1a5d23ad + if [ $$m == download ]; then s=49e4e9f049aa9da0453a67203d798587 elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705 else echo; echo "'$$m' not recognized."; exit 1 fi -- cgit v1.2.1 From 3c63f0febef7c3c8b41ffdd6b7033b1f77f6e21d Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Sun, 14 Jun 2020 01:55:33 +0100 Subject: Better explanation in the start of project configuration When './project configure' is run, after the basic checks of the compiler, a small statement is printed telling the user that some configuration questions will now be asked to start building Maneage on the system. Until now this description was confusing: it lead the reader to think that the local configuration (which was recommended to read before continuing) is in another file. With this commit, the text has been edited to explictly mention that the description of the steps following this notice should be read carefully. Thus avoiding that confusion. This issue was mentioned by Michael R. Crusoe. --- reproduce/software/shell/configure.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'reproduce') diff --git a/reproduce/software/shell/configure.sh b/reproduce/software/shell/configure.sh index 789ddd5..e4885d6 100755 --- a/reproduce/software/shell/configure.sh +++ b/reproduce/software/shell/configure.sh @@ -569,9 +569,13 @@ cat < Date: Sun, 14 Jun 2020 22:09:36 +0100 Subject: Better description for input data directory, pointing to INPUTS.conf Until now, the description of the input-data directory at configure time included a description of the input data (created by reading the values of 'INPUTS.conf'). Maintaining this is easy for a single dataset, but it becomes hard for a general project which may need many input datasets. To avoid extra complexity (for maintaining this list), the description now points a user of the project to the 'INPUTS.conf' file and asks them to look inside of it for seeing the necessary data. This infact helps with the users becoming familiar with the internal structure of Maneage and will allow the authors to focus on not having to worry about updating the low-level 'configure.sh' script. --- reproduce/software/shell/configure.sh | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) (limited to 'reproduce') diff --git a/reproduce/software/shell/configure.sh b/reproduce/software/shell/configure.sh index e4885d6..d9509ca 100755 --- a/reproduce/software/shell/configure.sh +++ b/reproduce/software/shell/configure.sh @@ -782,10 +782,6 @@ if [ x"$input_dir" = x ]; then else indir=$input_dir fi -wfpc2name=$(awk '!/^#/ && $1=="WFPC2IMAGE" {print $3}' $adir/INPUTS.conf) -wfpc2md5=$(awk '!/^#/ && $1=="WFPC2MD5" {print $3}' $adir/INPUTS.conf) -wfpc2size=$(awk '!/^#/ && $1=="WFPC2SIZE" {print $3}' $adir/INPUTS.conf) -wfpc2url=$(awk '!/^#/ && $1=="WFPC2URL" {print $3}' $adir/INPUTS.conf) if [ $rewritepconfig = yes ] && [ x"$input_dir" = x ]; then cat < Date: Mon, 15 Jun 2020 01:42:10 +0100 Subject: Configure script now accounts for non-interactive shells The project configuration requires a build-directory at configuration time, two other directories can optionally be given to avoid downloading the project's necessary data and software. It is possible to give these three directories as command-line options, or by interactively giving them after running the configure script. Until now, when these directories weren't given as command-line options, and the running shell was non-interactive, the configure script would crash on the line trying to interactively read the user's given directories (the 'read' command). With this commit, all the 'read' commands for these three directories are now put within an 'if' statement. Therefore, when 'read' fails (the shell is non-interactive), instead of a quiet crash, a descriptive message is printed, telling the user that cause of the problem, and suggesting a fix. This bug was found by Michael R. Crusoe. --- reproduce/software/shell/configure.sh | 39 +++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) (limited to 'reproduce') diff --git a/reproduce/software/shell/configure.sh b/reproduce/software/shell/configure.sh index d9509ca..b71ea96 100755 --- a/reproduce/software/shell/configure.sh +++ b/reproduce/software/shell/configure.sh @@ -712,7 +712,14 @@ EOF do # Ask the user (if not already set on the command-line). if [ x"$build_dir" = x ]; then - read -p"Please enter the top build directory: " build_dir + if read -p"Please enter the top build directory: " build_dir; then + just_a_place_holder_to_avoid_not_equal_test=1; + else + echo "ERROR: shell is in non-interactive-mode and no build directory specified." + echo "The build directory (described above) is mandatory, configuration can't continue." + echo "Please use '--build-dir' to specify a build directory non-interactively." + exit 1 + fi fi # If it exists, see if we can write in it. If not, try making it. @@ -782,6 +789,7 @@ if [ x"$input_dir" = x ]; then else indir=$input_dir fi +noninteractive_sleep=2 if [ $rewritepconfig = yes ] && [ x"$input_dir" = x ]; then cat < Date: Mon, 15 Jun 2020 03:26:29 +0100 Subject: OpenSSL now built after Perl After trying a clean build of Maneage in a Docker image (with a minimal debian:stable-20200607-slim OS), I noticed that the building of OpenSSL is failing because it doesn't find the proper Perl functionality. To fix it, with this commit, Perl is set as a prerequisite of OpenSSL and this fixed the problem. --- reproduce/software/make/basic.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'reproduce') diff --git a/reproduce/software/make/basic.mk b/reproduce/software/make/basic.mk index b4745e2..82bc42d 100644 --- a/reproduce/software/make/basic.mk +++ b/reproduce/software/make/basic.mk @@ -805,7 +805,7 @@ $(idir)/etc:; mkdir $@ # Note: cert.pm has to be AFTER the tarball, otherwise the build script # will try to unpack cert.pm and crash (it unpacks the first dependency # under `tdir'). -$(ibidir)/openssl: $(ibidir)/tar \ +$(ibidir)/openssl: $(ibidir)/perl \ $(tdir)/openssl-$(openssl-version).tar.gz \ $(tdir)/cert.pem \ | $(idir)/etc -- cgit v1.2.1 From 6a52c4ee9d2ee8b5723fc9fac4ebc97b77357613 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Tue, 16 Jun 2020 21:45:04 +0100 Subject: XLSX I/O properly accounts for local build Until now, when adding the necessary library flags to the build of XLSX I/O, we were effectively over-writing the 'LDFLAGS' variables. So the compiler was effectively not being told where to look for the necessary libraries. With this commit, to fix the problem, we now append the new linking flags to LDFLAGS in XLSX I/O's build, not over-write it. --- reproduce/software/make/high-level.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'reproduce') diff --git a/reproduce/software/make/high-level.mk b/reproduce/software/make/high-level.mk index 7cc2d51..75fa8ac 100644 --- a/reproduce/software/make/high-level.mk +++ b/reproduce/software/make/high-level.mk @@ -1224,9 +1224,9 @@ $(ibidir)/xlsxio: $(ibidir)/cmake \ if [ x$(on_mac_os) = xyes ]; then \ export CC=clang; \ export CXX=clang++; \ - export LDFLAGS="-lbz2"; \ + export LDFLAGS="$$LDFLAGS -lbz2"; \ else \ - export LDFLAGS="-lbz2 -lbsd"; \ + export LDFLAGS="$$LDFLAGS -lbz2 -lbsd"; \ fi; \ $(call cbuild, xlsxio-$(xlsxio-version), static, \ -DMINIZIP_DIR:PATH=$(idir) \ -- cgit v1.2.1