diff options
Diffstat (limited to 'configure')
-rwxr-xr-x | configure | 329 |
1 files changed, 243 insertions, 86 deletions
@@ -22,9 +22,6 @@ # <http://www.gnu.org/licenses/>. - - - # Script settings # --------------- # Stop the script if there are any errors. @@ -34,13 +31,138 @@ set -e -# Important internal locations -# ---------------------------- +# Default option values +jobs=0 +build_dir= +input_dir= +software_dir= +existing_conf=0 +minmapsize=10000000000 + + + + + +# Output of --help +# ---------------- +me=$0 # Executable file name. +print_help() { + # Print the output. + cat <<EOF +Usage: $me [OPTION]... + +Configure the reproducible paper template for this system (set local +settings for this system). The local settings can be given on the +command-line through the options below. If not, the configure script will +interactively ask for a value to each one (with basic necessary background +information printed before them). Alternatively, if you have already +configured this script for your system, you can use the '--existing-conf' +to use its values directly. + +RECOMMENDATION: If this is the first time you are running this pipeline, +please don't use the options and let the script explain each parameter in +full detail by simply running './configure'. + +The only mandatory value for this script is the local build directory. This +is where all the pipeline's outputs will be stored. Optionally, you can +also provide directories that host input data, or software source codes. If +the necessary files don't exist there, the template will automatically +download them. + +With the options below you can modify the default behavior. Just note that +you should not put an '=' sign between an option name and its value. + +Configure options: + Top-level directory settings: + -b, --build-dir=STR Top directory to build the project in. + -i, --input-dir=STR Directory containing input datasets (optional). + -s, --software-dir=STR Directory containing necessary software tarballs. + + Operating mode options: + -m, --minmapsize=INT (Gnuastro) Minimum number of bytes to use RAM. + -j, --jobs=INT Number of threads to build the software. + -e, --existing-conf Use (possibly existing) local configuration. + -h, --help Print this help list. + +Mandatory or optional arguments to long options are also mandatory or optional +for any corresponding short options. + +Reproducible paper template: https://gitlab.com/makhlaghi/reproducible-paper + +Report bugs to mohammad@akhlaghi.org +EOF +} + + + + + +# Functions to check option values and complain if necessary. +function on_off_option_error() { + cat <<EOF +$scriptname: '$1' doesn't take any values. +EOF + exit 1 +} + +function check_v() { + if [ x"$2" = x ]; then + echo "$scriptname: option '$1' requires an argument." + echo "Try '$scriptname --help' for more information." + exit 1; + fi +} + + + + + +# Separate command-line arguments from options. Then put the option +# value into the respective variable. +# +# Each option has two lines because we want to process both these formats: +# `--name=value' and `--name value'. The former (with `=') is a single +# command-line argument, so we just need to shift the counter by one. The +# latter (without `=') is two arguments, so we'll need two shifts. +while [[ $# -gt 0 ]] +do + case $1 in + # Input parameters. + -b=*|--build-dir=*) build_dir="${1#*=}"; check_v $1 "$build_dir"; shift;; + -b|--builddir) build_dir="$2"; check_v $1 "$build_dir"; shift;shift;; + -i=*|--inputdir=*) input_dir="${1#*=}"; check_v $1 "$input_dir"; shift;; + -i|--inputdir) input_dir="$2"; check_v $1 "$input_dir"; shift;shift;; + -s=*|--software-dir=*) software_dir="${1#*=}"; check_v $1 "$software_dir"; shift;; + -s|--software-dir) software_dir="$2"; check_v $1 "$software_dir"; shift;shift;; + -m=*|--minmapsize=*) minmapsize="${1#*=}"; check_v $1 "$minmapsize"; shift;; + -m|--minmapsize) minmapsize="$2"; check_v $1 "$minmapsize"; shift;shift;; + + # Operating mode options. + -j=*|--jobs=*) jobs="${1#*=}"; check_v $1 "$jobs"; shift;; + -j|--jobs) jobs="$2"; check_v $1 "$jobs"; shift;shift;; + -e=*|--existing-conf=*) existing_conf="${1#*=}"; check_v $1 "$existing_conf"; shift;; + -e|--existing-conf) existing_conf="$2"; check_v $1 "$existing_conf"; shift;shift;; + -?|--help) print_help; exit 0;; + + # Unrecognized option: + -*) echo "$scriptname: unknown option '$1'"; exit 1;; + + # Not an option, an argument. + *) echo "The configure script doesn't accept arguments."; exit 1;; + esac +done + + + + + +# Internal directories +# -------------------- # # These are defined to help make this script more readable. topdir=$(pwd) +lbdir=.build installedlink=.local -lbdir=reproduce/build cdir=reproduce/config optionaldir="/optional/path" @@ -141,22 +263,11 @@ printnotice=yes rewritepconfig=yes rewritegconfig=yes if [ -f $pconf ] || [ -f $glconf ]; then - - # If it already exits, see what the user wants to do. - echo "Atleast one local configuration file already exists." - echo - while [ "$userread" != "y" -a "$userread" != "n" ] - do - read -p"Re-write existing configuration file(s) (y/n)? " userread - done - - # Set `rewriteconfig'. - if [ $userread = "n" ]; then + if [ $existing_conf = 1 ]; then printnotice=no if [ -f $pconf ]; then rewritepconfig=no; fi if [ -f $glconf ]; then rewritegconfig=no; fi fi - echo fi @@ -220,10 +331,10 @@ if [ $rewritepconfig = yes ]; then !!!!!!!!!!!!!!!!!!!!!! Warning !!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -Couldn't find GNU Wget. It is used for downloading necessary programs and -data if they aren't already present in the specified directories. Therefore -the pipeline will crash if the necessary files are not already present on -the system. +Couldn't find GNU Wget, or cURL on this system. These programs are used for +downloading necessary programs and data if they aren't already present (in +directories that you can specify with this configure script). Therefore if +the necessary files are not present, the pipeline will crash. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -245,12 +356,13 @@ if [ $rewritepconfig = yes ]; then Build directory =============== -The "source" (this directory) and "build" directories are treated +The project's "source" (this directory) and "build" directories are treated separately. This greatly helps in managing the many intermediate files that are created during the build. The intermediate build files don't need to be -archived or backed up: you can always re-build them with this reproduction -pipeline. The build directory also needs a relatively large amount of free -space (atleast serveral Giga-bytes). +archived or backed up: you can always re-build them with the contents of +the source directory. The build directory also needs a relatively large +amount of free space (atleast serveral Giga-bytes), while the source +directory (all plain text) will usually be a mega-byte or less. '$lbdir' (a symbolic link to the build directory) will also be created during this configuration. It can help encourage you to set the actual @@ -258,30 +370,36 @@ build directory in a very different address from this one (one that can be deleted and has large volume), while having easy access to it from here. EOF - bdir="" + bdir= junkname=pure-junk-974adfkj38 while [ x$bdir == x ] do - # Ask the user. - read -p"Please enter the top build directory: " inbdir + # Ask the user (if not already set on the command-line). + if [ x"$build_dir" = x ]; then + read -p"Please enter the top build directory: " build_dir + fi # If it exists, see if we can write in it. If not, try making it. - if [ -d $inbdir ]; then - if mkdir $inbdir/$junkname 2> /dev/null; then - bdir=$(absolute_dir $inbdir) + if [ -d $build_dir ]; then + if mkdir $build_dir/$junkname 2> /dev/null; then + bdir=$(absolute_dir $build_dir) echo " -- Build directory: '$bdir'" - rm -rf $inbdir/$junkname + rm -rf $build_dir/$junkname else - echo " -- Can't write in '$inbdir'" + echo " -- Can't write in '$build_dir'" fi else - if mkdir $inbdir 2> /dev/null; then - bdir=$(absolute_dir $inbdir) + if mkdir $build_dir 2> /dev/null; then + bdir=$(absolute_dir $build_dir) echo " -- Build directory set to (the newly created): '$bdir'" else - echo " -- Can't create '$inbdir'" + echo " -- Can't create '$build_dir'" fi fi + + # Reset `build_dir' to blank, so it continues asking when the + # previous value wasn't usable. + build_dir= done fi @@ -291,12 +409,16 @@ fi # Input directory # --------------- -indir=$optionaldir +if [ x"$input_dir" = x ]; then + indir=$optionaldir +else + indir=$input_dir +fi wfpc2name=$(awk '!/^#/ && $1=="WFPC2IMAGE" {print $3}' $pdir/INPUTS.mk) wfpc2md5=$(awk '!/^#/ && $1=="WFPC2MD5" {print $3}' $pdir/INPUTS.mk) wfpc2size=$(awk '!/^#/ && $1=="WFPC2SIZE" {print $3}' $pdir/INPUTS.mk) wfpc2url=$(awk '!/^#/ && $1=="WFPC2URL" {print $3}' $pdir/INPUTS.mk) -if [ $rewritepconfig = yes ]; then +if [ $rewritepconfig = yes ] && [ x"$input_dir" = x ]; then cat <<EOF ---------------------------------- @@ -338,12 +460,16 @@ fi # Dependency tarball directory # ---------------------------- -if [ $rewritepconfig = yes ]; then +if [ x"$software_dir" = x ]; then ddir=$optionaldir +else + ddir=$software_dir +fi +if [ $rewritepconfig = yes ] && [ x"$software_dir" = x ]; then cat <<EOF --------------------------------------- -(OPTIONAL) Dependency tarball directory +(OPTIONAL) Software tarball directory --------------------------------------- To ensure an identical build environment, the pipeline will use its own @@ -367,42 +493,6 @@ fi -# Memory mapping minimum size -# --------------------------- -if [ $rewritegconfig = yes ]; then - defaultminmapsize=10000000000 - minmapsize=$defaultminmapsize - cat <<EOF - ---------------------------- -Minimum memory mapping size ---------------------------- - -Some programs (for example Gnuastro) can deal with cases where the local -system doesn't have enough memory (RAM) to keep large files. For example, -they will create memory-mapped (mmap) files on the HDD or SSD and -read/write to/from them instead of RAM. This will ofcourse, slow down the -processing, but atleast the program won't crash. - -Since the memory requirements of different systems are different and it has -no effect on the software's final result, the minimum size of an allocated -array to warrant a mapping to HDD/SSD instead of RAM must also be defined -here. This value will be used in the programs that support this feature. - -EOF - - read -p"Minimum memory mapping size in bytes (default: $minmapsize): " \ - tmpminmapsize - if [ x"$tmpminmapsize" != x ]; then - minmapsize=$tmpminmapsize - echo " -- Using '$minmapsize'" - fi -fi - - - - - # Write the parameters into the local configuration file. if [ $rewritepconfig = yes ]; then @@ -418,8 +508,12 @@ if [ $rewritepconfig = yes ]; then $pconf.in >> $pconf else # Read the values from existing configuration file. - inbdir=$(awk '$1=="BDIR" {print $3}' $pconf) - downloader=$(awk '$1=="DOWNLOADER" {print $3}' $pconf) + inbdir=$(awk '$1=="BDIR" {print $3}' $pconf) + + # The downloader command may contain multiple elements, so we'll just + # change the (in memory) first and second tokens to empty space and + # write the full line (the original file is unchanged). + downloader=$(awk '$1=="DOWNLOADER" {$1=""; $2=""; print $0}' $pconf) # Make sure all necessary variables have a value err=0 @@ -676,16 +770,79 @@ fi +# Build `flock' as first program +# ------------------------------ +# +# Flock (or file-lock) is a unique program in the pipeline that is +# necessary to serialize the (generally parallel) processing of make when +# necessary. GNU/Linux machines have it as part of their `util-linux' +# programs. But to be consistent, we will be using our own build. +# +# The reason its sepecial is that we need it to serialize the download +# process of the dependency tarballs. +flockversion=$(awk '/flock-version/{print $3}' \ + reproduce/config/pipeline/dependency-versions.mk) +flocktar=flock-$flockversion.tar.gz +flockurl=http://github.com/discoteq/flock/releases/download/v$flockversion/ + +# Prepare/download the tarball. +if ! [ -f $tardir/$flocktar ]; then + if [ -f $ddir/$flocktar ]; then + cp $ddir/$flocktar $tardir/$flocktar + else + if ! $downloader $tardir/$flocktar $flockurl/$flocktar; then + rm -f $tardir/$flocktar; + echo + echo "DOWNLOAD ERROR: Couldn't download the 'flock' tarball:" + echo " $flockurl" + echo + echo "You can manually place it in '$ddir' to avoid downloading." + exit 1 + fi + fi +fi + +# If the tarball is newer than the (possibly existing) program, then delete +# the program. +if [ -f .local/bin/flock ]; then + if [ $tardir/$flocktar -nt .local/bin/flock ]; then + rm .local/bin/flock + fi +fi + +# Build `flock' if necessary. +if ! [ -f .local/bin/flock ]; then + cd $depdir + tar xf $tardir/$flocktar + cd flock-$flockversion + ./configure --prefix=$instdir + make; make install + cd $topdir + rm -rf $depdir/flock-$flockversion +fi + + + + + # Build Basic dependencies # ------------------------ # -# Since the system might not have GNU Coreutils at this stage, we'll just -# default to 4 threads if the actual number isn't found. This is because -# some versions of Make complain about not having enough 'pipe' (memory) on -# some systems. After some searching, I found out its because of too many -# threads. -if which nproc > /dev/null 2>/dev/null; then numthreads=$(nproc --all); -else numthreads=1; +# Since the system might not have GNU Make at this stage, and other Make +# implementations can't deal with parallel build properly, we'll just +# default to 1 thread. This is because some versions of Make complain about +# not having enough 'pipe' (memory) on some systems. After some searching, +# I found out its because of too many threads. GNU Make will be present on +# GNU systems (that have `nproc', part of GNU Coreutils). So to simplify +# the test for GNU Make, we'll just try running `nproc'. +if which nproc > /dev/null 2>/dev/null; then + if [ $jobs = 0 ]; then + numthreads=$(nproc --all); + else + numthreads=$jobs + fi +else + numthreads=1; fi make -f reproduce/src/make/dependencies-basic.mk \ rpath_command=$rpath_command \ |