From b7e88b1bf82b936f8fe07c0c2c5f8621c2018f3a Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Sun, 11 Nov 2018 19:09:21 +0000 Subject: Dependencies built at the start of the pipeline To enable easy/proper reproduction of results, all the high-level dependencies are now built within the pipeline and installed in a fixed directory that is added to the PATH of the Makefile. This includes GNU Bash and GNU Make, which are then used to run the pipeline. The `./configure' script will first build Bash and Make within itself, then it will build All the dependencies are also built to be static. So after they are built, changing of the system's low-level libraries (like C library) won't change the tarballs. Currently the C library and C compiler aren't built within the pipeline, but we'll hopefully add them to the build process also. With this change, we now have full control of the shell and Make that will be used in the pipeline, so we can safely remove some of the generalities we had before. --- .gitignore | 3 + .gnuastro | 1 - Makefile | 123 --------- README | 117 +-------- README.md | 318 ++++++++++++----------- configure | 278 +++++++++++--------- reproduce/config/pipeline/LOCAL.mk.in | 32 ++- reproduce/config/pipeline/dependency-versions.mk | 15 ++ reproduce/src/make/Top-Makefile | 108 ++++++++ reproduce/src/make/delete-me.mk | 12 +- reproduce/src/make/dependencies.mk | 254 ++++++++++++++++++ reproduce/src/make/initialize.mk | 84 ++---- reproduce/src/make/paper.mk | 22 +- 13 files changed, 776 insertions(+), 591 deletions(-) delete mode 120000 .gnuastro delete mode 100644 Makefile create mode 100644 reproduce/config/pipeline/dependency-versions.mk create mode 100644 reproduce/src/make/Top-Makefile create mode 100644 reproduce/src/make/dependencies.mk diff --git a/.gitignore b/.gitignore index c5bfd19..b768efd 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ mmap_* *.auxlock +.local +Makefile +.gnuastro LOCAL_tmp.mk LOCAL_old.mk reproduce/build diff --git a/.gnuastro b/.gnuastro deleted file mode 120000 index 5ccb4fd..0000000 --- a/.gnuastro +++ /dev/null @@ -1 +0,0 @@ -reproduce/config/gnuastro \ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index 3bbafcd..0000000 --- a/Makefile +++ /dev/null @@ -1,123 +0,0 @@ -# A ONE-LINE DESCRIPTION OF THE WHOLE PIPELINE -# -# Original author: -# Mohammad Akhlaghi -# Contributing author(s): -# Your name -# Copyright (C) 2018, Your Name. -# -# This Makefile is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This Makefile is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# A copy of the GNU General Public License is available at -# . - - - - - -# Ultimate target of this pipeline -# -------------------------------- -# -# The final paper (in PDF format) is the main target of this whole -# reproduction pipeline. So as defined in the Make paradigm, we are -# defining it here. But since we also want easy access to the build -# directory during processing (before the PDF is build), that is placed as -# the first prerequisite. -# -# Note that if you don't have LaTeX to build the PDF or generally are just -# interested in the processing, you can skip create the final PDF creation -# with `pdf-build-final' of `reproduce/config/pipeline/pdf-build.mk'. -all: reproduce/build paper.pdf - - - - - -# Use Bash as the shell -# --------------------- -# -# Some systems don't default to Bash as the shell they use to execute -# the recipes or run Make's `$(shell)' function. So to be sure the -# scripts are executed in a similar manner on all systems, we'll set -# the default shell for this pipeline to be Bash. -SHELL := /bin/bash - - - - - -# Include specific Makefiles -# -------------------------- -# -# To keep things clean, managable and readable, each set of operations is -# (and must be) classified (modularized) by context into separate -# Makefiles: the more the better. They are included in this top-level -# Makefile through the command below. -# -# To further help in readability, it is best to avoid including Makefiles -# within any other Makefile. So in short, it is best that the `foreach' -# loop below contains all the `reproduce/src/make/*.mk' files. -# -# IMPORTANT NOTE: order matters in the inclusion of the processing -# Makefiles. As the pipeline grows, some Makefiles will probably define -# variables/dependencies that others need. Therefore unlike the -# `reproduce/config/pipeline/*.mk' Makefiles which only define low-level -# variables (not dependent on other variables and contain no rules), the -# high-level processing Makefiles are included through the `foreach' loop -# below by explicitly requesting them in a specific order here. -include reproduce/config/pipeline/*.mk -include $(foreach f, initialize \ - download \ - delete-me \ - paper \ - , reproduce/src/make/$(f).mk) - - - - - -# LaTeX macros for paper -# ---------------------- -# -# The final report's PDF (final target of this reproduction pipeline) takes -# variable strings from the pipeline. Those variables are defined as LaTeX -# macros in `tex/pipeline.tex'. This file is thus the interface between the -# pipeline scripts and the final PDF. -# -# Each of the pipeline steps will save their macros into their own `.tex' -# file in the `$(mtexdir)' directory. Those individual macros are the -# pre-requisite to `tex/pipeline.txt'. `tex/pipeline.tex' is thus a -# high-level output and is defined in this top-most Makefile (and not -# `reproduce/src/make/paper.mk'). This enables a clear demonstration of the -# top-level dependencies clearly. -# -# Note that if you don't want the final PDF and just want the processing -# and file outputs, you can remove the value of the `pdf-build-final' -# variable in `reproduce/config/pdf-build.mk'. -tex/pipeline.tex: $(foreach f, initialize \ - download \ - delete-me \ - , $(mtexdir)/$(f).tex) - - # If no PDF is requested, then just exit here. -ifeq ($(pdf-build-final),) - @echo - @echo - @echo "-----" - @echo "Everything is OK until this point, but not building PDF." - @echo "To do so, give a value to the 'pdf-build-final' variable." - @echo "It is defined in 'reproduce/config/pipeline/pdf-build.mk'." - @echo - @exit 1 -endif - - # Merge all the TeX macros that are prepared for building the PDF. - @cat $(mtexdir)/*.tex > $@ diff --git a/README b/README index 919005b..fbbc7f6 100644 --- a/README +++ b/README @@ -1,7 +1,7 @@ -Reproduction pipeline for XXXXX. +Reproduction pipeline for XXXXXXX -For a general introduction to reproducible science as done here, please see -the link below: +For a general introduction to reproducible science as implemented in this +pipeline, please see the link below: http://akhlaghi.org/reproducible-science.html @@ -10,112 +10,23 @@ the link below: Running the pipeline ==================== -To reproduce the results, please take these steps in order: +1. Necessary dependency: LaTeX. -1. Make sure you have the dependencies (below). + Other than LaTeX, this pipeline just needs some basic building tools + that are already present if you have ever build a software from source + (Tar, C compiler, and Make). The pipeline will build its own + dependencies when you run `./configure'. -2. Configure/set your top-level directories with the command below. It is +2. Configure the environment (top-level directories in particular) and + build all the necessary software for use in the next step. It is recommended to modify the directories. Please read the comments (lines starting with an `#') in the file that is opened by `./configure' and set the directories accordingly. $ ./configure -3. Run the following command to reproduce everything on 8 threads. If your - CPU has a different number of threads, change the number. +3. Run the following command to reproduce all the analysis and build the + final `paper.pdf' on 8 threads. If your CPU has a different number of + threads, change the number. - $ make -j8 - - - -Output -====== - -The output of the pipeline is a PDF file, describing the published paper. - - - -Dependencies -============ - -To reproduce the results you need the following programs. Except for -Gnuastro, the version of the other programs will not make a difference. - - Gnuastro Y.Y - ------------ - - Gnuastro is a large collection of programs for astronomical data analysis - on the command-line. This is an intermediate version of Gnuastro (the - tarball is not officially released on the Gnuastro webpage). However, - this pipeline will ONLY work with this version of Gnuastro. For - convenience, this tarball is available in the following link: - - https://zenodo.org/record/ZZZZZZ/files/gnuastro-Y.Y.tar.gz - - To uncompress, build and install the Gnuastro tarball, follow the - instructions in the link below. - - https://www.gnu.org/software/gnuastro/manual/html_node/Quick-start.html - - If you successfully downloaded the tarball from the link above, please - ignore the rest of this section on Gnuastro. If not, this version of - Gnuastro is always present in Gnuastro's version controlled history and - this reproduction pipeline contains the fix to implement to it. - - To build the above version of Gnuastro, please clone Gnuastro and - checkout this version as shown in the following commands: - - $ git clone http://git.sv.gnu.org/r/gnuastro.git - $ git checkout Y.Y - - Afterwords, you need to bootstrap Gnuastro as described in the following - link. - - https://www.gnu.org/software/gnuastro/manual/html_node/Bootstrapping.html - - You are now ready to configure, build and install Gnuastro as described - in the "Quick start" link above. - - - - AWK - --- - - AWK is a program for working with text files. GNU AWK is the most common - implementation and it is commonly already installed on most systems. - - - Bash - ---- - - Bash is the most common "shell" or command-line manager and available in - almost all systems (even if not the default). It doesn't need to be the - default for this pipeline to run. - - - flock - ----- - - This is a small program to manage file locks from the command-line. It is - available in all GNU/Linux distributions. For those operating systems - that don't have it, an implementation that is easy to install is - available in the link below. - - https://github.com/discoteq/flock - - - - Wget or cURL - ------------ - - These programs (`wget' or `curl' on the command-line) are used to - download the input files if you don't already have them. - - - - LaTeX - ----- - - LaTeX is used to build the final PDF of this pipeline. Some important - packages within LaTeX that this pipeline uses are: `biblatex' and - `pgfplots'. + $ .local/bin/make -j8 diff --git a/README.md b/README.md index 1ee2c12..4e4af70 100644 --- a/README.md +++ b/README.md @@ -228,54 +228,69 @@ Reproduction pipeline architecture In order to adopt this pipeline to your research, it is important to first understand its architecture so you can navigate your way in the directories and understand how to implement your research project within its -framework. In short, when the user runs `make` to start the processing, the -first file that is read is the top-level `Makefile`. Therefore, we'll start -our navigation with this file. This file is heavily commented so hopefully -the descriptions in each comment will be enough to understand the general -details. As you read this section, please also look at the contents of the -mentioned files and directories to fully understand what is being said. - -Before starting to look into the top Makefile, it is important to recall +framework. But before reading this theoretical discussion, please run the +pipeline without any change, just to see how it works. + +In order to obtain a reproducible result it is important to have an +identical environment (for example same versions the programs that it will +use). This also has the added advantage that in your separate research +projects, you can use different versions of a single software and they +won't interfere. Therefore, the pipeline builds its own dependencies during +the `./configure` step. Building of the dependencies is managed by +`reproduce/src/make/dependencies.mk`. So later, if you add a new +program/library for your processing, don't forget to include a rule on how +to build it, in this file. + +When you run `.local/bin/make` to start the processing, the first file that +is read is the top-level `Makefile`. Therefore, we'll start our +navigation/discussion with this file. This file is relatively short and +heavily commented so hopefully the descriptions in each comment will be +enough to understand the general details. As you read this section, please +also look at the contents of the mentioned files and directories to fully +understand what is being said. + +Before starting to look into the top `Makefile`, it is important to recall that Make defines dependencies by files. Therefore, the input and output of every step must be a file. Also recall that Make will use the modification -date of the prerequisite and target files to see if the a target must be -re-built or not. Therefore during the processing _many_ intermediate files +date of the prerequisite and target files to see if the target must be +re-built or not. Therefore during the processing, _many_ intermediate files will be created (see the tips section below on a good strategy to deal with -large/huge files). Therefore, in configuration time, the user can define a -top-level build directory variable (or `$(BDIR)`) to host all the -intermediate files. This directory doesn't need to be version controlled or -even synchronized or backed-up in other servers: its contents are all -products of the pipeline after all, and can be easily re-created any -time. As you define targets, it is thus important to place them all under -sub-directories of `$(BDIR)`. +large/huge files). + +To keep the source and (intermediate) built files separate, at +configuration time, the user _must_ define a top-level build directory +variable (or `$(BDIR)`) to host all the intermediate files. This directory +doesn't need to be version controlled or even synchronized, or backed-up in +other servers: its contents are all products of the pipeline, and can be +easily re-created any time. As you define targets for your new rules, it is +thus important to place them all under sub-directories of `$(BDIR)`. Let's start reviewing the processing with the top Makefile. Please open and -inspect it as we go along here. The first step is to define the ultimate -target (`paper.pdf`). You shouldn't modify this line. The rule to build -`paper.pdf` is in another Makefile that will be imported into this top -Makefile later. Don't forget that Make goes over all the process once (to -define dependencies and etc) and then starts its execution. So it is fine -to define the rule to build `paper.pdf` at a later stage (this is the -beauty of Make after all). - -Having defined the top target, we will import all the necessary -Makefiles. As you see in `Makefile`, first we include all -`reproduce/config/pipeline/*.mk`. The configuration of each logical step of -the pipeline is placed here as a separate file. These Makefiles must only -contain raw Make variables (pipeline configurations). By raw we mean that -the Make variables in these files must not depend on any other variables -because we don't want to assume any order in reading them. It is very -important to *not* define any rule or other Make construct in any of these -_configuration-Makefiles_ (see the next paragraph for Makefiles with -rules). This will enable you to set the respective files in this directory -as a prerequisite to any target that depends on their variable -values. Therefore, if you change any of the values, all targets that depend -on those values will be re-built. +inspect it as we go along here. The first step (un-commented line) defines +the ultimate target (`paper.pdf`). You shouldn't modify this line. The rule +to build `paper.pdf` is in another Makefile that will be imported into this +top Makefile later. Don't forget that Make first scans the Makefile(s) once +completely (to define dependencies and etc) and starts its execution after +that. So it is fine to define the rule to build `paper.pdf` at a later +stage (this is one beauty of Make!). + +Having defined the top target, we will include all the necessary +Makefiles. First we include all `reproduce/config/pipeline/*.mk`. The +configuration of each logical step of the pipeline is placed here as a +separate file. These Makefiles must only contain raw Make variables +(pipeline configurations). By raw we mean that the Make variables in these +files must not depend on any other variables because we don't want to +assume any order in reading them. It is very important to *not* define any +rule or other Make construct in any of these _configuration-Makefiles_ (see +the next paragraph for Makefiles with rules). This will enable you to set +the respective files in this directory as a prerequisite to any target that +depends on their variable values. Therefore, if you change any of the +values, all targets that depend on those values will be re-built. Once all the raw variables have been imported into the top Makefile, we are ready to import the Makefiles containing the details of the processing steps (Makefiles containing rules, let's call these -_workhorse-Makefiles_). But *order is important* in this phase because the +_workhorse-Makefiles_). But in this phase *order is important*, because the prerequisites of most rules will be other rules that will be defined at a lower level (not a fixed name like `paper.pdf`). The lower-level rules must be imported into Make before the higher-level ones. Hence, we can't use a @@ -284,47 +299,52 @@ these Makefiles are defined in `reproduce/src/make`, therefore, the top Makefile uses the `foreach` function to read them in a specific order. The main body of this pipeline is thus going to be managed within the -workhorse-Makefiles of `reproduce/src/make`. If you set clear-to-understand -names for these workhorse-Makefiles and follow the convention of the top -Makefile that you only include one workhorse-Makefile per line, the -`foreach` loop of the top Makefile that imports them will become very easy -to read and understand by eye. This will let you know which step you are -taking before or after another without much thought (in a few months -especially). Projects will scale up very fast. Thus if you don't start and -continue with a clean and robust management strategy, in the end it will -become very dirty and hard to manage/understand (even for yourself). As a -general rule of thumb, break your rules into as many logically-similar but -independent steps as possible. - -All processing steps ultimately (usually after many rules) end up in some -number, image, figure, or table that must be included in the paper. After -all, if you don't want to report the value of a processing, why would you -do it in the first place? Therefore if the targets in a workhorse-Makefile -aren't directly a prerequisite of other workhorse-Makefile targets, they -should be a pre-requisite of an intermediate LaTeX macro file in -`$(BDIR)/tex/macros` (the highest-level target of that workhorse-Makefile). - -The last part of the top-level Makefile is the rule to build -`tex/pipeline.tex`. This file is the connection between the processing -steps of the pipeline and the creation of the final PDF. In -`reproduce/src/make/paper.mk`, you will notice that `paper.pdf` (final -target of the whole reproduction pipeline) depends on -`tex/pipeline.tex`. This file is thus the connection of these two very -high-level different phases of the reproduction pipeline. Therefore, to -keep the over-all management clean, the rule to create this bridge between -the processing and paper-writing phases is defined in the top-level -Makefile. - -But `tex/pipeline.tex` is only a merging/concatenation of LaTeX macros -defined as the output of each high-level processing step. In some cases you -want tables and images to also be included in the final PDF. To keep these +workhorse-Makefiles that are in `reproduce/src/make`. If you set +clear-to-understand names for these workhorse-Makefiles and follow the +convention of the top Makefile that you only include one workhorse-Makefile +per line, the `foreach` loop of the top Makefile that imports them will +become very easy to read and understand by eye. This will let you know +generally which step you are taking before or after another. Projects will +scale up very fast. Thus if you don't start and continue with a clean and +robust convention like this, in the end it will become very dirty and hard +to manage/understand (even for yourself). As a general rule of thumb, break +your rules into as many logically-similar but independent steps as +possible. + +All processing steps are assumed to ultimately (usually after many rules) +end up in some number, image, figure, or table that are to be included in +the paper. The writing of the values into the final report is managed +through separate LaTeX files that only contain macros (a name given to a +number/string to be used in the LaTEX source, which will be replaced when +compiling it to the final PDF). So usually the last target in a Makefile is +a `.tex` file (with the same base-name as the Makefile, but in +`$(BDIR)/tex/macros`). This intermediate TeX file rule will only contain +commands to fill the TeX file up with values/names that were done in that +Makefile. As a result, if the targets in a workhorse-Makefile aren't +directly a prerequisite of other workhorse-Makefile targets, they should be +a pre-requisite of that intermediate LaTeX macro file. + +In `reproduce/src/make/paper.mk` contains the rule to build `paper.pdf` +(final target of the whole reproduction pipeline). If look in it, you will +notice that it depends on `tex/pipeline.tex`. Therefore, last part of the +top-level `Makefile` is the rule to build +`tex/pipeline.tex`. `tex/pipeline.tex` is the connection between the +processing steps of the pipeline, and the creation of the final +PDF. Therefore, to keep the over-all management clean, the rule to create +this bridge between the two phases is defined in the top-level `Makefile`. + +As you see in the top-level `Makefile`, `tex/pipeline.tex` is only a +merging/concatenation of LaTeX macros defined as the output of each +high-level processing step (the separate work-horse Makefiles that you +included). + +One of the LaTeX macros created by `reproduce/src/make/initialize.mk` is +`\bdir`. It is the location of the build directory. In some cases you want +tables and images to also be included in the final PDF. To keep these necessary LaTeX inputs, you can define other directories under -`$(BDIR)/tex` in the relevant workhorse-Makefile. One of the LaTeX macros -that `reproduce/src/make/initialize.mk` creates is the location of the -build directory, so you can easily guide LaTeX to look into the proper -directory through the `\bdir` macro. If the target of the rule that creates -these other LaTeX inputs isn't a prerequisite of other rules, add it as a -pre-requisite of `tex/pipeline.tex`. +`$(BDIR)/tex` in the relevant workhorse-Makefile. You can then easily guide +LaTeX to look into the proper directory to import an image for example +through the `\bdir` macro. During the research, it often happens that you want to test a step that is not a prerequisite of any higher-level operation. In such cases, you can @@ -351,18 +371,16 @@ mind are listed below. workhorse-Makefiles in the top-level `Makefile`. - Do not use any constant numbers (or important names like filter names) - in the workhorse-Makefiles. Define such constants as logically-grouped - separate configuration-Makefiles in `reproduce/config/pipeline`. Then - set the respective configuration-Makefiles file as a pre-requisite to - any rule that uses the variable defined in it. + in the workhorse-Makefiles or paper's LaTeX source. Define such + constants as logically-grouped, separate configuration-Makefiles in + `reproduce/config/pipeline`. Then set the respective + configuration-Makefiles file as a pre-requisite to any rule that uses + the variable defined in it. - - To be executed, any target should either be a prerequisite of another - rule (possibly in another Makefile), or a file that is directly imported - into LaTeX as fixed macros for inclusion in text or LaTeX settings (in - `$(BDIR)/tex/macros`), images, plots or tables (in other `$(BDIR)/tex` - sub-directories). In any cases, through any number of intermediate - prerequisites, all processing steps should end in (be a prerequisite of) - `tex/pipeline.tex`. + - Through any number of intermediate prerequisites, all processing steps + should end in (be a prerequisite of) + `tex/pipeline.tex`. `tex/pipeline.tex` is the bridge between the + processing steps and PDF-building steps. @@ -415,11 +433,11 @@ advanced in later stages of your work. below. ```shell - $ ./configure # Prepare the directory structure. + $ ./configure # Set top directories and build dependencies. $ make # Run the pipeline. # Open 'paper.pdf' and see if everything is ok. - $ make clean # Remove all pipeline outputs. + $ make distclean # Remove all pipeline outputs. ``` - **Copyright**, **name** and **date**: Go over the existing scripting @@ -446,35 +464,27 @@ advanced in later stages of your work. the title and authors, please feel free to use your own methods. - **Gnuastro**: GNU Astronomy Utilities (Gnuastro) is currently a - dependency of the pipeline and without it, the pipeline will complain - and abort. The main reason for this is to demonstrate how critically - important it is to version your software. If you don't want to install - Gnuastro please follow the instructions in the list below. If you have - installed Gnuastro and tried the pipeline, but don't need Gnuastro in - your pipeline, also follow the list below. If you do want to use - Gnuastro in your pipeline, be sure to un-comment the `onlyversion` - option in `reproduce/config/gnuastro/gnuastro.conf` file and set it to - your version of Gnuastro. This will force you to keep the pipeline in - match with the version of Gnuastro you are using all the time and also - allow commits to be exactly reproducible also (for example if you - update to a new version of Gnuastro during your research project). If - you will be using Gnuastro, you can also remove the "marks" (comments) - put in the relevant files of the list below to make them more - readable. - - - Delete the description about Gnuastro in `README`. + dependency of the pipeline which will be built and used. The main + reason for this is to demonstrate how critically important it is to + version your software. If you do want to use Gnuastro in your + pipeline, be sure to un-comment the `onlyversion` option in + `reproduce/config/gnuastro/gnuastro.conf` file and set it to your + version of Gnuastro. This will force you to keep the pipeline in match + with the version of Gnuastro you are using all the time and also allow + commits to be exactly reproducible also (for example if you update to + a new version of Gnuastro during your research project). If you will + be using Gnuastro, you can also remove the "marks" (comments) put in + the relevant files of the list below to make them more readable. + - Delete marked part(s) in `configure`. - Delete marked parts in `reproduce/src/make/initialize.mk`. - Delete `and Gnuastro \gnuastroversion` from `tex/preamble-style.tex`. - **Other dependencies**: If there are any more of the dependencies that you don't use (or others that you need), then remove (or add) them in - the respective parts of `configure`. It is commented thoroughly and - reading over the comments should guide you on what to add/remove and - where. Note that it is always good to have an option to download the - necessary datasets in case the user doesn't have them. But in case - your pipeline doesn't need any downloads, you can also remove the - sections of `configure` that are for `flock` and the downloader. + the respective parts of `reproduce/src/make/dependencies.mk`. It is + commented thoroughly and reading over the comments should guide you on + what to add/remove and where. - **`README`**: Go through this top-level instruction file and make it fit to your pipeline: update the text and etc. Don't forget that your @@ -483,25 +493,27 @@ advanced in later stages of your work. work. Therefore, also check and update `README` one last time when you are ready to publish your work (and its reproduction pipeline). - - **First input dataset**: The user manages the top-level directory of the - input data through the variables set in + - **Input dataset**: The user manages the top-level directory of the input + data through the variables set in `reproduce/config/pipeline/LOCAL.mk.in` (the user actually edits a `LOCAL.mk` file that is created by `configure` from the `.mk.in` file, - but the `.mk` file is not under version control). So open this file - and replace `SURVEY` in the variable name and value with the name of - your input survey or dataset (all in capital letters), for example if - you are working on data from the XDF survey, replace `SURVEY` with - `XDF`. Don't change anything else in the value, just the the all-caps - name. Afterwards, change any occurrence of `SURVEY` in the whole - pipeline with the new name. You can find the occurrences with a simple - command like the ones shown below. We follow the Make convention here - that all `ONLY-CAPITAL` variables are those directly set by the user - and all `small-caps` variables are set by the pipeline designer. All - variables that also depend on this survey have a `survey` in their - name. Hence, also correct all these occurrences to your new name in - small-caps. Of course, ignore those occurrences that are irrelevant, - like those in this file. Note that in the raw version of this template - no target depends on these files, so they are ignored. Afterwards, set + but the `.mk` file is not under version control). Datasets are usually + large and the users might already have their copy don't need to + download them). So you can define a variable (all in capital letters) + in `reproduce/config/pipeline/LOCAL.mk.in`. For example if you are + working on data from the XDF survey, use `XDF`. You can use this + variable to identify the location of the raw inputs on the running + system. Here, we'll assume its name is `SURVEY`. Afterwards, change + any occurrence of `SURVEY` in the whole pipeline with the new + name. You can find the occurrences with a simple command like the ones + shown below. We follow the Make convention here that all + `ONLY-CAPITAL` variables are those directly set by the user and all + `small-caps` variables are set by the pipeline designer. All variables + that also depend on this survey have a `survey` in their name. Hence, + also correct all these occurrences to your new name in small-caps. Of + course, ignore/delete those occurrences that are irrelevant, like + those in this file. Note that in the raw version of this template no + target depends on these files, so they are ignored. Afterwards, set the webpage and correct the filenames in `reproduce/src/make/download.mk` if necessary. @@ -824,38 +836,30 @@ future. - *Containers*: It is important to have better/full control of the environment of the reproduction pipeline. Our current reproducible - paper pipeline simply assumes that the necessary software are already - installed on the host system. So it ignores details of how they are - built or the versions of their dependencies (which is not good). As in - [zenodo.1164774](https://doi.org/10.5281/zenodo.1164774) or - [zenodo.1163746](https://doi.org/10.5281/zenodo.1163746), the best we - can currently do is distribute the tarballs of the necessary - software. The solution here is based on [an interesting + paper pipeline builds the higher-level programs (for example GNU Bash, + GNU Make, GNU AWK and etc) it needs and sets `PATH` to prefer its own + builds. It currently doesn't build and use its own version of + lower-level tools (like the C library and compiler). We plan to add the + build steps of these low level tools so the system's `PATH' can be + completely ignored within the pipeline and we are in full control of + the whole build process. Another solution is based on [an interesting tutorial](https://mozillafoundation.github.io/2017-fellows-sf/re-papers/index.html) by the Mozilla science lab to build reproducible papers. It suggests using the [Nix package manager](https://nixos.org/nix/about.html) to build the necessary software for the pipeline and run the pipeline in - its completely closed environment. This is a great solution because - using Nix or [Guix](https://www.gnu.org/software/guix/) (which is based - on Nix, but uses the [Scheme + its completely closed environment. This is an interesting solution + because using Nix or [Guix](https://www.gnu.org/software/guix/) (which + is based on Nix, but uses the [Scheme language](https://en.wikipedia.org/wiki/Scheme_(programming_language)), not a custom language for the management) will allow a fully working closed environment on the host system which contains the instructions - on how to build the environment. These package managers can also - co-exist with the package manager of the host's operating system and - they allow separate versions of a software to be present. Thus it is - not necessary to change existing programs on the host system (that have - been updated for example) to run a particular reproduction - pipeline. The availability of the instructions to build the programs - and environment with Nix or Guix, makes them a better solution than - binary containers like [docker](https://www.docker.com/) which are - essentially just a binary (not human readable) black box and only - usable on the given CPU architecture. The initial running of Nix or - Guix and setting up of the environment can also be included in a - `Makefile` of this pipeline, and thus be fully automatic. The software - tarballs (to be used by Nix or Guix) can also be uploaded/archived, as - we do now. These package managers can then be instructed to get the - tarballs for building the environment from there. + on how to build the environment. The availability of the instructions + to build the programs and environment with Nix or Guix, makes them a + better solution than binary containers like + [docker](https://www.docker.com/) which are essentially just a binary + (not human readable) black box and only usable on the given CPU + architecture. However, one limitation of using these is their own + installation (which usually requires root access). diff --git a/configure b/configure index 331c029..d9c2ef6 100755 --- a/configure +++ b/configure @@ -25,11 +25,23 @@ +# Script settings +# --------------- +# Stop the script if there are any errors. +set -e + + + + + # Important internal locations # ---------------------------- # # These are defined to help make this script more readable. +topdir=$(pwd) +installedlink=.local cdir=reproduce/config + pdir=$cdir/pipeline pconf=$pdir/LOCAL.mk ptconf=$pdir/LOCAL_tmp.mk @@ -40,31 +52,22 @@ glconf=$cdir/gnuastro/gnuastro-local.conf -# Check mandatory dependencies -# ---------------------------- +# Inform the user +# --------------- # -# The list of program names you need for this pipeline is in the `for' loop -# below. In case you don't need Gnuastro, then remove `astnoisechisel' from -# the list. -echo "---------------------" -echo "Checking dependencies" -echo "---------------------" -for prog in bash cat sed make awk grep flock astnoisechisel pdflatex biber; do - if type $prog > /dev/null; then - echo " '$prog' was found." - else - echo - echo "ERROR: '$prog' not found in search path." - if [ $prog = "flock" ]; then - echo - echo "'flock' (file-lock, used for managing parallel operations)" - echo "is available on GNU/Linux OSs through your package manager," - echo "please install it. For other OSs, you can install the " - echo "implementation at: https://github.com/discoteq/flock" - fi - exit 1 - fi -done +# Print some basic information so the user gets a feeling of what is going +# on and is prepared on what will happen next. +echo +echo "-----------------------------------------" +echo "Reproduction pipeline local configuration" +echo "-----------------------------------------" +echo +echo "Local configuration includes things like top-level directories," +echo "or processing steps." +echo +echo "It is STRONGLY recommended to read the comments, and set the best " +echo "values for your system (where necessary)." +echo @@ -85,47 +88,17 @@ if type curl > /dev/null; then elif type wget > /dev/null; then downloader="wget -O"; else - echo echo "=======" echo "Warning" echo "=======" - echo "Couldn't find any of the 'curl' or 'wget' programs. They are used for" - echo "downloading necessary data if they aren't already present in the" - echo "specified directories. Therefore the pipeline will crash if the" - echo "necessary data are not already present on the system." + echo "Couldn't find any of the 'curl' or 'wget' programs. They are used" + echo "for downloading necessary programs and data if they aren't already" + echo "present in the specified directories. Therefore the pipeline will" + echo "crash if the necessary files are not already present on the system." echo "=======" echo downloader="no-downloader-found" - print_downloader_notice=0 fi; -if [ $print_downloader_notice = 1 ]; then - prog=$(echo "$downloader" | awk '{print $1}') - echo " '$prog' will be used for downloading files if necessary." -fi - - - - - -# If `LOCAL.mk' already exists -# ---------------------------- -# -# `LOCAL.mk' is the top-most local configuration for the pipeline. If it -# already exists when this script is run, we'll copy it to a `LOCAL.mk.old' -# file as backup. For example the user might have ran `./configure' by -# mistake. -if [ -f $pconf ]; then - if mv $pconf $poconf; then - echo - echo "=======" - echo "WARNING" - echo "=======" - echo " Existing configuration moved to '$poconf'." - echo - else - exit 1 - fi -fi @@ -136,71 +109,60 @@ fi # # We'll start writing of the local configuration file with the values that # have been found so far. -sed -e 's|@downloader[@]|'"$downloader"'|g' \ - $pconf.in > $ptconf +sed -e 's|@downloader[@]|'"$downloader"'|g' $pconf.in > $ptconf -# Inform the user -# --------------- +# Remove possibly existing configuration file? +# -------------------------------------------- # -# Print some basic information so the user gets a feeling of what is going -# on and is prepared on what will happen next. -echo -echo "-----------------------------------------" -echo "Reproduction pipeline local configuration" -echo "-----------------------------------------" -echo -echo "Local settings include things like top-level directories," -echo "or processing steps." -echo -echo "Pressing 'y' will open the local settings file in an editor" -echo "so you can modify the default values if you want. Each" -echo "variable is thoroughly described in the comments (lines" -echo "starting with a '#') above it." -echo -echo "It is strongly recommended to inspect/edit/set the best " -echo "values for your system (where necessary)." -echo -while [ "$userread" != "y" -a "$userread" != "n" ] -do - read -p"Edit the default local configuration (y/n)? " userread -done +# `LOCAL.mk' is the top-most local configuration for the pipeline. If it +# already exists when this script is run, we'll make a copy of it as backup +# (for example the user might have ran `./configure' by mistake). +if [ -f $pconf ]; then + # If it already exits, see what the user wants to do. + while [ "$userread" != "y" -a "$userread" != "n" ] + do + read -p"A configuration already exists, re-configure (y/n)? " userread + done + + # Move the configuration if requested. + if [ $userread = "y" ]; then + if mv $pconf $poconf; then + echo "-- Existing configuration moved to '$poconf'." + fi + else + echo "-- Using existing configuration file." + fi + echo +fi -# Let user to edit local settings -# ------------------------------- + +# Let user edit local settings +# ---------------------------- # -# We'll open a text editor so the user can read the comments of the -# necessary local settings and set the top directories manually. -if [ $userread = "y" ]; then - - # Open a text editor to set the given directories - if emacs $ptconf; then ready=1 - elif gedit $ptconf; then ready=1 - elif vi $ptconf; then ready=1 +# If the configuration file exists at this stage, the user didn't want to +# change it. So, we'll open a text editor for the user to read the comments +# of the necessary local settings and set the top directories manually. +if ! [ -f $pconf ]; then + + if [ x"$EDITOR" = "x" ]; then + userread=$EDITOR else - echo - echo "=================" - echo "IMPORTANT WARNING" - echo "=================" - echo "No common text editor was found on this system." - echo "Please set the values in the following files manually:" - echo " - $pconf" - # --------- Delete for no Gnuastro --------- - echo " - $glconf" - # ------------------------------------------ - echo "=================" - echo - ready=0 + userread=not-an-editor fi -else - ready=1 + + while ! $userread $ptconf > /dev/null 2>&1 + do + read -p"Your favorite text editor: " userread + done + echo fi @@ -252,13 +214,92 @@ echo " minmapsize $mm" >> $glconf # # Make the final file that will be used and delete the temporary file along # with a possible file ending with `~' that is put there by some editors. -create_file_with_notice $pconf -cat $ptconf >> $pconf +if ! [ -f $pconf ]; then + create_file_with_notice $pconf + cat $ptconf >> $pconf +fi rm -f $ptconf $ptconf"~" +# Read the necessary directories and build the top build directory as well +# as the symbolic link to it. +lbdir=reproduce/build +rm -f $lbdir + +bdir=$(awk '/BDIR =/ {print $3}' $pconf) +ddir=$(awk '/DEPENDENCIES-DIR =/ {print $3}' $pconf) +if ! [ -d $bdir ]; then mkdir $bdir; fi +absolutebdir=$(readlink -f $bdir) +ln -s $absolutebdir $lbdir + +depdir=$absolutebdir/dependencies +if ! [ -d $depdir ]; then mkdir $depdir; fi + +tardir=$depdir/tarballs +if ! [ -d $tardir ]; then mkdir $tardir; fi + +instdir=$depdir/installed +if ! [ -d $instdir ]; then mkdir $instdir; fi + +rm -f $installedlink +ln -s $instdir $installedlink + +echo "Build directory ready: $absolutebdir"; +echo "... 'reproduce/build' is a symbolic link to it (for easy access)."; +echo + + + + + +# Notice on build Make and Bash, build top directories +# ---------------------------------------------------- +tsec=10 +echo; +echo; +echo "----------------"; +echo "Necessary reproduction pipeline dependencies will be built in $tsec sec." +echo +echo "NOTE: the built software will NOT BE INSTALLED on your system, they" +echo "are only for local usage by this reproduction pipeline." +echo +sleep $tsec +export USE_LOCAL_BASH=no +bindir=$bdir/dependencies/installed/bin +make -j2 -f reproduce/src/make/dependencies.mk $bindir/bash $bindir/make + + + + + +# Build the final symbolic links +cd $topdir +rm -f Makefile +ln -s $(pwd)/reproduce/src/make/Top-Makefile Makefile +# --------- Delete for no Gnuastro --------- +rm -f .gnuastro +ln -s $(pwd)/reproduce/config/gnuastro .gnuastro +# ------------------------------------------ + + + + + +# Build all the necsesary dependencies +# ------------------------------------ +# +# We will be making all the dependencies before running the top-level +# Makefile. To make the job easier, we'll do it in a Makefile, not a +# script. Bash and Make were the tools we need to run Makefiles, so we had +# to build them in this script. But after this, we can rely on Makefiles. +export USE_LOCAL_BASH=yes +.local/bin/make -f reproduce/src/make/dependencies.mk -j8 + + + + # Print a final notice # -------------------- @@ -266,16 +307,13 @@ rm -f $ptconf $ptconf"~" # The configuration is now complete, we can inform the user on the next # step(s) to take. echo -if [ $ready = 1 ]; then - echo "The reproduction pipeline has been configured for this system." - echo "Please run the following command to start the pipeline:" -else - echo "AFTER MANUALLY EDITING THE FILE(S) ABOVE, please run the following" - echo "commands to run the pipeline." -fi -echo "(Replace '8' with the number of CPU threads available)" +echo "------" +echo "The reproduction pipeline and its environment are SUCCESSFULLY configured." +echo "Please run the following command to start." +echo +echo "(Replace '8' with the number of CPU threads)" echo -echo " make -j8" +echo " ./local/bin/make -j8" echo echo "To change the configuration later, please re-run './configure'," echo "DO NOT manually edit the relevant files." diff --git a/reproduce/config/pipeline/LOCAL.mk.in b/reproduce/config/pipeline/LOCAL.mk.in index 7a29344..02f8b11 100644 --- a/reproduce/config/pipeline/LOCAL.mk.in +++ b/reproduce/config/pipeline/LOCAL.mk.in @@ -22,17 +22,17 @@ -# Input data -# ---------- +# (OPTIONAL) Dependencies directory +# --------------------------------- # -# This is where the input data (with the same file-name standard as the -# online webpage) are stored. If this directory doesn't exist, or it -# doesn't contain the files (with the correct file-name formats), it will -# be created and the images will be downloaded. See -# `reproduce/config/pipeline/web.mk', for the URLs containing the expected -# inputs for each survey. -SURVEY = reproduce/SURVEY - +# To ensure an identical build environment, the pipeline will use its own +# build of the programs it needs. Therefore the tarball of the relevant +# programs are necessary for this pipeline. If a tarball isn't present in +# the specified directory, it will be downloaded by the pipeline. Therefore +# an internet connection will be mandatory. +# +# Important note: Keep atleast one blank space before and after `='. +DEPENDENCIES-DIR = /optional/path/to/directory/containing/tarballs @@ -47,7 +47,9 @@ SURVEY = reproduce/SURVEY # intermediate/derivative files. Also to make synchronization and backups # more easy: the contents of the build directory do not need to be backed # up since they can be reproduced and they can be large. -BDIR = reproduce/BDIR +# +# IMPORTANT NOTE: Keep atleast one blank space before and after `='. +BDIR = /path/of/directory/for/building @@ -76,9 +78,11 @@ MINMAPSIZE = 1000000000 # Downloader program # ------------------ # -# The downloder program (and its output option name) that will be used if -# any of the necessary datasets aren't already available on the -# system. This is usually set at an early stage of the configuration system +# The downloder program (and its output option name, for example `wget -O' +# or `curl -o') that will be used if any of the necessary datasets aren't +# already available on the system. +# +# This is usually set at an early stage of the configuration system # automatically before the file is opened for editing by the user. It is # thus recommended to not modify it manually. DOWNLOADER = @downloader@ diff --git a/reproduce/config/pipeline/dependency-versions.mk b/reproduce/config/pipeline/dependency-versions.mk new file mode 100644 index 0000000..3d9d8b3 --- /dev/null +++ b/reproduce/config/pipeline/dependency-versions.mk @@ -0,0 +1,15 @@ +# Versions of the various dependnecies +bash-version = 4.4.18 +cfitsio-version = 3450 +coreutils-version = 8.30 +gawk-version = 4.2.1 +ghostscript-version = 9.25 +gnuastro-version = 0.7.58-e72a +grep-version = 3.1 +gsl-version = 2.5 +libjpeg-version = v9b +libgit2-version = 0.26.0 +libtool-version = 2.4.6 +make-version = 4.2.90 +sed-version = 4.5 +wcslib-version = 6.2 diff --git a/reproduce/src/make/Top-Makefile b/reproduce/src/make/Top-Makefile new file mode 100644 index 0000000..5d94766 --- /dev/null +++ b/reproduce/src/make/Top-Makefile @@ -0,0 +1,108 @@ +# A ONE-LINE DESCRIPTION OF THE WHOLE PIPELINE +# +# Original author: +# Mohammad Akhlaghi +# Contributing author(s): +# Your name +# Copyright (C) 2018, Your Name. +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# A copy of the GNU General Public License is available at +# . + + + + + +# Ultimate target of this pipeline +# -------------------------------- +# +# The final paper (in PDF format) is the main target of this whole +# reproduction pipeline. So as defined in the Make paradigm, we are +# defining it here. +# +# Note that if you don't have LaTeX to build the PDF, or generally are just +# interested in the processing, you can skip create the final PDF creation +# with `pdf-build-final' of `reproduce/config/pipeline/pdf-build.mk'. +all: paper.pdf + + + + + +# Include specific Makefiles +# -------------------------- +# +# To keep things clean, managable and readable, each set of operations is +# (and must be) classified (modularized) by context into separate +# Makefiles: the more the better. They are included in this top-level +# Makefile through the command below. +# +# To further help in readability, it is best to avoid including Makefiles +# within any other Makefile. So in short, it is best that the `foreach' +# loop below contains all the `reproduce/src/make/*.mk' files. +# +# IMPORTANT NOTE: order matters in the inclusion of the processing +# Makefiles. As the pipeline grows, some Makefiles will probably define +# variables/dependencies that others need. Therefore unlike the +# `reproduce/config/pipeline/*.mk' Makefiles which only define low-level +# variables (not dependent on other variables and contain no rules), the +# high-level processing Makefiles are included through the `foreach' loop +# below by explicitly requesting them in a specific order here. +include reproduce/config/pipeline/*.mk +include $(foreach f, initialize \ + download \ + delete-me \ + paper \ + , reproduce/src/make/$(f).mk) + + + + + +# LaTeX macros for paper +# ---------------------- +# +# The final report's PDF (final target of this reproduction pipeline) takes +# variable strings from the pipeline. Those variables are defined as LaTeX +# macros in `tex/pipeline.tex'. This file is thus the interface between the +# pipeline scripts and the final PDF. +# +# Each of the pipeline steps will save their macros into their own `.tex' +# file in the `$(mtexdir)' directory. Those individual macros are the +# pre-requisite to `tex/pipeline.txt'. `tex/pipeline.tex' is thus a +# high-level output and is defined in this top-most Makefile (and not +# `reproduce/src/make/paper.mk'). This enables a clear demonstration of the +# top-level dependencies clearly. +# +# Note that if you don't want the final PDF and just want the processing +# and file outputs, you can remove the value of the `pdf-build-final' +# variable in `reproduce/config/pdf-build.mk'. +tex/pipeline.tex: $(foreach f, initialize \ + download \ + delete-me \ + , $(mtexdir)/$(f).tex) + + # If no PDF is requested, then just exit here. +ifeq ($(pdf-build-final),) + @echo + @echo + @echo "-----" + @echo "Everything is OK until this point, but not building PDF." + @echo "To do so, give a value to the 'pdf-build-final' variable." + @echo "It is defined in 'reproduce/config/pipeline/pdf-build.mk'." + @echo + @exit 1 +endif + + # Merge all the TeX macros that are prepared for building the PDF. + @cat $(mtexdir)/*.tex > $@ diff --git a/reproduce/src/make/delete-me.mk b/reproduce/src/make/delete-me.mk index de72873..67f0440 100644 --- a/reproduce/src/make/delete-me.mk +++ b/reproduce/src/make/delete-me.mk @@ -63,10 +63,10 @@ $(mtexdir)/delete-me.tex: $(dm) # Here, we are first using AWK to find the minimum and maximum # values, then using it again to read each separately to use in the # macro definition. - mm=$$(awk 'BEGIN{min=99999; max=-min} \ - {if($$2>max) max=$$2; if($$2> $@; \ - v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}'); \ + mm=$$(awk 'BEGIN{min=99999; max=-min} + {if($$2>max) max=$$2; if($$2> $@; + v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}'); echo "\newcommand{\deletememax}{$$v}" >> $@ diff --git a/reproduce/src/make/dependencies.mk b/reproduce/src/make/dependencies.mk new file mode 100644 index 0000000..0fb5a34 --- /dev/null +++ b/reproduce/src/make/dependencies.mk @@ -0,0 +1,254 @@ +# Build the reproduction pipeline dependencies (programs and libraries). +# +# ------------------------------------------------------------------------ +# !!!!! IMPORTANT NOTES !!!!! +# +# This Makefile will be run by the initial `./configure' script. It is not +# included into the reproduction pipe after that. +# +# This Makefile also builds GNU Bash and GNU Make. Therefore this is the +# only Makefile in the reproduction pipeline where you MUST NOT assume that +# GNU Bash or GNU Make are to be used. +# +# ------------------------------------------------------------------------ +# +# Original author: +# Mohammad Akhlaghi +# Contributing author(s): +# Your name +# Copyright (C) 2018, Your Name. +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# A copy of the GNU General Public License is available at +# . + + + +# Top level environment +include reproduce/config/pipeline/LOCAL.mk +include reproduce/config/pipeline/dependency-versions.mk + +ddir = $(BDIR)/dependencies +tdir = $(BDIR)/dependencies/tarballs +idir = $(BDIR)/dependencies/installed +ibdir = $(BDIR)/dependencies/installed/bin +ildir = $(BDIR)/dependencies/installed/lib + +# Define the top-level programs to build (installed in `.local/bin', so for +# Coreutils, only one of its executables is enough). +top-level-programs = ls gawk gs grep libtool sed astnoisechisel +all: $(foreach p, $(top-level-programs), $(ibdir)/$(p)) + +# This Makefile will be called to also build Bash locally. So when we don't +# have it yet, we'll have to use the system's bash. +ifeq ($(USE_LOCAL_BASH),yes) +SHELL := $(ibdir)/bash +else +SHELL := /bin/sh +endif + +# Other basic environment settings. +.ONESHELL: +.SHELLFLAGS = -ec +PATH := $(ibdir):$(PATH) +LDFLAGS := -L$(ildir) $(LDFLAGS) +CPPFLAGS := -I$(idir)/include $(CPPFLAGS) +LD_LIBRARY_PATH := $(ildir):$(LD_LIBRARY_PATH) + + + + + +# Tarballs +# -------- +# +# All the necessary tarballs are defined and prepared with this rule. +tarballs = $(foreach t, bash-$(bash-version).tar.gz \ + cfitsio$(cfitsio-version).tar.gz \ + coreutils-$(coreutils-version).tar.xz \ + gawk-$(gawk-version).tar.gz \ + ghostscript-$(ghostscript-version).tar.gz \ + gnuastro-$(gnuastro-version).tar.gz \ + grep-$(grep-version).tar.xz \ + gsl-$(gsl-version).tar.gz \ + jpegsrc.$(libjpeg-version).tar.gz \ + libtool-$(libtool-version).tar.gz \ + libgit2-$(libgit2-version).tar.gz \ + sed-$(sed-version).tar.xz \ + make-$(make-version).tar.gz \ + wcslib-$(wcslib-version).tar.bz2 \ + , $(tdir)/$(t) ) +$(tarballs): $(tdir)/%: + if [ -f $(DEPENDENCIES-DIR)/$* ]; then + cp $(DEPENDENCIES-DIR)/$* $@ + else + # Remove all numbers, `-' and `.' from the tarball name so we can + # search more easily only with the program name. + n=$$(echo $* | sed -e's/[0-9\-]/ /g' -e's/\./ /g' \ + | awk '{print $$1}' ) + + # Set the top download link of the requested tarball. + if [ $$n = bash ]; then w=http://ftp.gnu.org/gnu/bash + elif [ $$n = cfitsio ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = coreutils ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = gawk ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = ghostscript ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = gnuastro ]; then w=http://akhlaghi.org + elif [ $$n = grep ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = gsl ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = jpegsrc ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = libtool ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = libgit ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = sed ]; then w=WWWWWWWWWWWWWWWW + elif [ $$n = make ]; then w=http://akhlaghi.org + elif [ $$n = wcslib ]; then w=WWWWWWWWWWWWWWWW + else + echo; echo; echo; + echo "'$$n' not recognized as a dependency name to download." + echo; echo; echo; + exit 1 + fi + + # Download the requested tarball. + $(DOWNLOADER) $@ $$w/$* + fi + + + + + +# Customized build +# ---------------- +# +# Programs that need some customization on their build. +# For CFITSIO we'll need to intervene manually to remove the check on +# libcurl (which can be real trouble in this controlled environment). +$(ildir)/libcfitsio.a: $(ibdir)/ls \ + $(tdir)/cfitsio$(cfitsio-version).tar.gz + # Same as before + cd $(ddir) + tar xf $(tdir)/cfitsio$(cfitsio-version).tar.gz + cd cfitsio + + # Remove the part that checks for the CURL library, so it assumes + # that the CURL library wasn't found. + awk 'NR<4785 || NR>4847' configure > new_configure + mv new_configure configure + chmod +x configure + + # Do the standard configuring and building + ./configure CFLAGS=--static --disable-shared --prefix=$(idir) + make; make install; + cd ..; rm -rf cfitsio + + +# Why not shared: Gnuastro's configure can't link with it in static mode. +$(ildir)/libgit2.a: $(tdir)/libgit2-$(libgit2-version).tar.gz + cd $(ddir) + tar xf $(tdir)/libgit2-$(libgit2-version).tar.gz + cd libgit2-$(libgit2-version) + mkdir build + cd build + export CFLAGS="--static $$CFLAGS" + cmake .. -DUSE_SSH=OFF -DUSE_OPENSSL=OFF -DBUILD_SHARED_LIBS=OFF \ + -DBUILD_CLAR=OFF -DTHREADSAFE=ON + cmake --build . + cmake .. -DCMAKE_INSTALL_PREFIX=$(idir) + cmake --build . --target install + cd ../.. + rm -rf libgit2-$(libgit2-version) + + + + + +# GNU Build system programs +# ------------------------- +# +# Programs that use the basic GNU build system. +gbuild = cd $(ddir); tar xf $(tdir)/$(1); cd $(2); \ + if [ $(3)x = staticx ]; then \ + opts="CFLAGS=--static --disable-shared"; \ + fi; \ + ./configure $$opts $(4) --prefix=$(idir); make $(5); \ + check="$(6)"; if [ x"$$check" != x ]; then $$check; fi; \ + make install; cd ..; rm -rf $(2) + +$(ibdir)/bash: $(tdir)/bash-$(bash-version).tar.gz + $(call gbuild,$(subst $(tdir),,$<), bash-$(bash-version), static) + + +# Unfortunately GNU Make needs dynamic linking in two instances: when +# loading objects (dynamically linked libraries), or when using the +# `getpwnam' function (for tilde expansion). The first can be disabled with +# `--disable-load', but unfortunately I don't know any way to fix the +# second. So, we'll have to build it dynamically for now. +$(ibdir)/make: $(tdir)/make-$(make-version).tar.gz + $(call gbuild,$(subst $(tdir),,$<), make-$(make-version)) + + +$(ibdir)/ls: $(tdir)/coreutils-$(coreutils-version).tar.xz + $(call gbuild,$(subst $(tdir),,$<), coreutils-$(coreutils-version), \ + static) + + +$(ibdir)/gawk: $(tdir)/gawk-$(gawk-version).tar.gz \ + $(ibdir)/ls + $(call gbuild,$(subst $(tdir),,$<), gawk-$(gawk-version), static) + + +$(ibdir)/sed: $(tdir)/sed-$(sed-version).tar.xz \ + $(ibdir)/ls + $(call gbuild,$(subst $(tdir),,$<), sed-$(sed-version), static) + + +$(ibdir)/grep: $(tdir)/grep-$(grep-version).tar.xz \ + $(ibdir)/ls + $(call gbuild,$(subst $(tdir),,$<), grep-$(grep-version), static) + + +$(ibdir)/libtool: $(tdir)/libtool-$(libtool-version).tar.gz \ + $(ibdir)/ls + $(call gbuild,$(subst $(tdir),,$<), libtool-$(libtool-version), static) + + +$(ildir)/libgsl.a: $(tdir)/gsl-$(gsl-version).tar.gz \ + $(ibdir)/ls + $(call gbuild,$(subst $(tdir),,$<), gsl-$(gsl-version), static) + + +$(ildir)/libwcs.a: $(tdir)/wcslib-$(wcslib-version).tar.bz2 \ + $(ildir)/libcfitsio.a + $(call gbuild,$(subst $(tdir),,$<), wcslib-$(wcslib-version), , \ + LIBS="-pthread -lcurl -lm" --without-pgplot \ + --disable-fortran) + + +$(ibdir)/gs: $(tdir)/ghostscript-$(ghostscript-version).tar.gz \ + $(ibdir)/ls + $(call gbuild,$(subst $(tdir),,$<), ghostscript-$(ghostscript-version)) + + +$(ildir)/libjpeg.a: $(tdir)/jpegsrc.$(libjpeg-version).tar.gz + $(call gbuild,$(subst $(tdir),,$<), jpeg-9b, static) + + +$(ibdir)/astnoisechisel: $(tdir)/gnuastro-$(gnuastro-version).tar.gz \ + $(ildir)/libgsl.a \ + $(ildir)/libcfitsio.a \ + $(ildir)/libwcs.a \ + $(ibdir)/gs \ + $(ildir)/libjpeg.a \ + $(ildir)/libgit2.a \ + + $(call gbuild,$(subst $(tdir),,$<), gnuastro-$(gnuastro-version), \ + static, , -j8, make check -j8) diff --git a/reproduce/src/make/initialize.mk b/reproduce/src/make/initialize.mk index f615e22..165db78 100644 --- a/reproduce/src/make/initialize.mk +++ b/reproduce/src/make/initialize.mk @@ -47,36 +47,24 @@ pconfdir = reproduce/config/pipeline -# Sanity check -# ------------ +# High level environment +# ---------------------- # -# We need to make sure that the `./configure' command has already been -# run. The output of `./configure' is the `$(pconfdir)/LOCAL.mk' file and -# this is the non-time-stamp prerequisite of $(BDIR), see below. +# We want the full recipe to be executed in one call to the shell. Also we +# want Make to run the specific version of Bash that we have installed +# during `./configure' time. # -# There is one problem however: if the user hasn't run `./configure' yet, -# then `BDIR' isn't defined (will just evaluate to blank space). Therefore -# it won't appear in the prerequisites and the pipeline will try to build -# the other directories in the top root directory (`/'). To solve this -# problem, when `BDIR' isn't defined, we'll define it with a place-holder -# name (only so it won't evaluate to blank space). Note that this -# directory will never be built. -ifeq ($(BDIR),) -configure-run = no -BDIR = reproduce/BDIR -else -configure-run = yes -endif -$(pconfdir)/LOCAL.mk: - @echo - @echo "================================================================" - @echo "For the pipeline's local settings, please run this command first" - @echo "(P.S. this local configuration is only necessary one time)" - @echo - @echo " $$ ./configure" - @echo "================================================================" - @echo - @exit 1 +# Regarding the directories, this pipeline builds its major dependencies +# itself and doesn't use the local system's default tools. With these +# environment variables, we are setting it to prefer the software we have +# build here. +.ONESHELL: +.SHELLFLAGS = -ec +SHELL := .local/bin/bash +PATH := .local/bin:$(PATH) +LDFLAGS := -L.local/lib $(LDFLAGS) +CPPFLAGS := -I.local/include $(CPPFLAGS) +LD_LIBRARY_PATH := .local/lib:$(LD_LIBRARY_PATH) @@ -103,52 +91,32 @@ $(pconfdir)/LOCAL.mk: # are looking for in this pipeline. .SUFFIXES: $(tikzdir): | $(texbdir); mkdir $@ -$(BDIR): | $(pconfdir)/LOCAL.mk; mkdir $@ $(texdir) $(lockdir): | $(BDIR); mkdir $@ $(mtexdir) $(texbdir): | $(texdir); mkdir $@ -# Symbolic link to build directory -# -------------------------------- -# -# Besides $(BDIR), we are also making a symbolic link to it for easy -# access. Recall that it is recommended that the actual build directory be -# in a completely separate part of the file system (a place that may easily -# be completely deleted). -# -# Note that $(BDIR) might not be an absolute path and this will complicate -# the symbolic link creation. To be generic, we'll first call `readlink' to -# make sure we have an absolute address, then we'll make a symbolic link to -# that. -reproduce/build: | $(BDIR) - absbdir=$$(readlink -f $(BDIR)); \ - ln -s $$absbdir $@ - - - - # High-level Makefile management # ------------------------------ # # About `.PHONY': these are targets that must be built even if a file with -# their name exists. Most don't correspond to a file, but those that do are -# included here ensure that the file is always built in every run: for -# example the pipeline versions may change within two separate runs, so we -# want it to be rebuilt every time. +# their name exists. +# +# Only `$(mtexdir)/initialize.tex' corresponds to a file. This is because +# we want to ensure that the file is always built in every run: it contains +# the pipeline version which may change between two separate runs, even +# when no file actually differs. .PHONY: all clean distclean clean-mmap $(mtexdir)/initialize.tex -distclean: clean; rm -f $(pconfdir)/LOCAL.mk # --------- Delete for no Gnuastro --------- clean-mmap:; rm -f reproduce/config/gnuastro/mmap* # ------------------------------------------ clean: clean-mmap -ifeq ($(configure-run),yes) rm -rf $(BDIR) -endif rm -f reproduce/build *.pdf *.log *.out *.aux *.auxlock - +distclean: clean + rm -f Makefile $(pconfdir)/LOCAL.mk .gnuastro @@ -163,12 +131,12 @@ endif $(mtexdir)/initialize.tex: | $(mtexdir) # Version of the pipeline. - @v=$$(git describe --dirty --always); \ + @v=$$(git describe --dirty --always); echo "\newcommand{\pipelineversion}{$$v}" > $@ # --------- Delete for no Gnuastro --------- # Version of Gnuastro. - @v=$$(astnoisechisel --version | awk 'NR==1{print $$NF}'); \ + @v=$$(astnoisechisel --version | awk 'NR==1{print $$NF}'); echo "\newcommand{\gnuastroversion}{$$v}" >> $@ # ------------------------------------------ diff --git a/reproduce/src/make/paper.mk b/reproduce/src/make/paper.mk index 844f157..79d7722 100644 --- a/reproduce/src/make/paper.mk +++ b/reproduce/src/make/paper.mk @@ -40,10 +40,10 @@ $(texbdir)/paper.bbl: tex/references.tex \ # We'll run LaTeX first to generate the `.bcf' file (necessary for # `biber') and then run `biber' to generate the `.bbl' file. - p=$$(pwd); \ - export TEXINPUTS=$$p:$$TEXINPUTS; \ - cd $(texbdir); \ - pdflatex -shell-escape -halt-on-error $$p/paper.tex; \ + p=$$(pwd); + export TEXINPUTS=$$p:$$TEXINPUTS; + cd $(texbdir); + pdflatex -shell-escape -halt-on-error $$p/paper.tex; biber paper @@ -61,9 +61,13 @@ $(texbdir)/paper.bbl: tex/references.tex \ paper.pdf: tex/pipeline.tex paper.tex $(texbdir)/paper.bbl \ | $(tikzdir) $(texbdir) - # Make the report. - p=$$(pwd); \ - export TEXINPUTS=$$p:$$TEXINPUTS; \ - cd $(texbdir); \ - pdflatex -shell-escape -halt-on-error $$p/paper.tex + # Go into the top TeX build directory and make the paper. + p=$$(pwd) + export TEXINPUTS=$$p:$$TEXINPUTS + cd $(texbdir) + pdflatex -shell-escape -halt-on-error $$p/paper.tex + + # Come back to the top pipeline directory and copy the built PDF + # file here. + cd $$p cp $(texbdir)/$@ $@ -- cgit v1.2.1