#!/bin/sh # # Necessary preparations/configurations for the reproducible project. # # Copyright (C) 2018-2020 Mohammad Akhlaghi # # This script is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This script is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this script. If not, see . # Script settings # --------------- # Stop the script if there are any errors. set -e # Project-specific settings # ------------------------- # # The variables defined here may be different between different # projects. Ideally, they should be detected automatically, but we haven't # had the chance to implement it yet (please help if you can!). Until then, # please set them based on your project (if they differ from the core # branch). need_gfortran=0 # Internal directories # -------------------- # # These are defined to help make this script more readable. topdir=$(pwd) optionaldir="/optional/path" adir=reproduce/analysis/config cdir=reproduce/software/config pconf=$cdir/LOCAL.conf ptconf=$cdir/LOCAL_tmp.conf poconf=$cdir/LOCAL_old.conf depverfile=$cdir/versions.conf depshafile=$cdir/checksums.conf # Notice for top of generated files # --------------------------------- # # In case someone opens the files output from the configuration scripts in # a text editor and wants to edit them, it is important to let them know # that their changes are not going to be permenant. create_file_with_notice () { if echo "# IMPORTANT: file can be RE-WRITTEN after './project configure'" > "$1" then echo "#" >> "$1" echo "# This file was created during configuration" >> "$1" echo "# ('./project configure'). Therefore, it is not under" >> "$1" echo "# version control and any manual changes to it will be" >> "$1" echo "# over-written if the project re-configured." >> "$1" echo "#" >> "$1" else echo; echo "Can't write to $1"; echo; exit 1 fi } # Get absolute address # -------------------- # # Since the build directory will go into a symbolic link, we want it to be # an absolute address. With this function we can make sure of that. absolute_dir () { if stat "$1" 1> /dev/null; then echo "$(cd "$(dirname "$1")" && pwd )/$(basename "$1")" else exit 1; fi } # Check file permission handling (POSIX-compatibility) # ---------------------------------------------------- # # Check if a `given' directory handles permissions as expected. # # This is to prevent a known bug in the NTFS filesystem that prevents # proper installation of Perl, and probably some other packages. This # function receives the directory as an argument and then, creates a dummy # file, and examines whether the given directory handles the file # permissions as expected. # # Returns `0' if everything is fine, and `255' otherwise. Choosing `0' is # to mimic the `$ echo $?' behavior, while choosing `255' is to prevent # misunderstanding 0 and 1 as true and false. # # ===== CAUTION! ===== # # # Since there is a `set -e' before running this function, the whole script # stops and exits IF the `check_permission' (or any other function) returns # anything OTHER than `0'! So, only use this function as a test. Here's a # minimal example: # # if $(check_permission $some_directory) ; then # echo "yay"; else "nay"; # fi ; check_permission () { # Make a `junk' file, activate its executable flag and record its # permissions generally. local junkfile=$1/check_permission_tmp_file rm -f $junkfile echo "Don't let my short life go to waste" > $junkfile chmod +x $junkfile local perm_before=$(ls -l $junkfile | awk '{print $1}') # Now, remove the executable flag and record the permissions. chmod -x $junkfile local perm_after=$(ls -l $junkfile | awk '{print $1}') # Clean up before leaving the function rm -f $junkfile # If the permissions are equal, the filesystem doesn't allow # permissions. if [ $perm_before = $perm_after ]; then # Setting permission FAILED return 1 else # Setting permission SUCCESSFUL return 0 fi } # Check if there is enough free space available in the build directory # -------------------------------------------------------------------- # # Use this function to check if there is enough free space in a # directory. It is meant to be passed to the 'if' statement in the # shell. So if there is enough space, it returns 0 (which translates to # TRUE), otherwise, the funcion returns 1 (which translates to FALSE). # # Expects to be called with two arguments, the first is the threshold and # the second is the desired directory. The 'df' function checks the given # path to see where it is mounted on, and how much free space there is on # that partition (in units of 1024 bytes). # # synopsis: # $ free_space_warning # # example: # To check if there is 5MB of space available in /path/to/check # call the command with arguments as shown below: # $ free_space_warning 5000 /path/to/check/free/space free_space_warning() { fs_threshold=$1 fs_destpath=$2 return $(df $fs_destpath \ | awk 'FNR==2 {if($4>'$fs_threshold') print 1; \ else print 0; }') } # See if we are on a Linux-based system # -------------------------------------- # # Some features are tailored to GNU/Linux systems, while the BSD-based # behavior is different. Initially we only tested macOS (hence the name of # the variable), but as FreeBSD is also being inlucded in our tests. As # more systems get used, we need to tailor these kinds of things better. kernelname=$(uname -s) if [ x$kernelname = xLinux ]; then on_mac_os=no # Don't forget to add the respective C++ compiler below (leave 'cc' in # the end). c_compiler_list="gcc clang cc" else host_cc=1 on_mac_os=yes # Don't forget to add the respective C++ compiler below (leave 'cc' in # the end). c_compiler_list="clang gcc cc" fi # Check for C/C++ compilers # ------------------------- # # To build the software, we'll need some basic tools (the C/C++ compilers # in particular) to be present. has_compilers=no for c in $c_compiler_list; do # Set the respective C++ compiler. if [ x$c = xcc ]; then cplus=c++; elif [ x$c = xgcc ]; then cplus=g++; elif [ x$c = xclang ]; then cplus=clang++; else cat < /dev/null 2>/dev/null; then export CC=$c; if type $cplus > /dev/null 2>/dev/null; then export CXX=$cplus has_compilers=yes break fi fi done if [ x$has_compilers = xno ]; then cat < $testsource < #include int main(void){printf("...C compiler works.\n"); return EXIT_SUCCESS;} EOF if $CC $testsource -o$testprog && $testprog; then rm $testsource $testprog else rm $testsource cat < $testsource < #include int main(void) { void *handle=dlopen ("/lib/CEDD_LIB.so.6", RTLD_LAZY); return 0; } EOF if $CC $testsource -o$testprog 2>/dev/null > /dev/null; then needs_ldl=no; else needs_ldl=yes; fi # See if the C compiler can build static libraries # ------------------------------------------------ # # We are manually only working with shared libraries: because some # high-level programs like Wget and cURL need dynamic linking and if we # build the libraries statically, our own builds will be ignored and these # programs will go and find their necessary libraries on the host system. # # Another good advantage of shared libraries is that we can actually use # the shared library tool of the system (`ldd' with GNU C Library) and see # exactly where each linked library comes from. But in static building, # unless you follow the build closely, its not easy to see if the source of # the library came from the system or our build. static_build=no # Print warning if the host CC is to be used. if [ x$host_cc = x1 ]; then cat <&1 \ | tr ' ' '\n' \ | awk '/\-\-target/' \ | sed -e's/\-\-target=//') if [ x"$gcctarget" != x ]; then if [ -f /usr/lib/$gcctarget/libc.a ]; then export sys_library_path=/usr/lib/$gcctarget export sys_cpath=/usr/include/$gcctarget fi fi # For a check: #echo "sys_library_path: $sys_library_path" #echo "sys_cpath: $sys_cpath" fi # See if a link-able static C library exists # ------------------------------------------ # # A static C library and the `sys/cdefs.h' header are necessary for # building GCC. if [ x"$host_cc" = x0 ]; then echo; echo; echo "Checking if static C library is available..."; cat > $testsource < #include #include int main(void){printf("...yes\n"); return EXIT_SUCCESS;} EOF cc_call="$CC $testsource $CPPFLAGS $LDFLAGS -o$testprog -static -lc" if $cc_call && $testprog; then gccwarning=0 rm $testsource $testprog else echo; echo "Compilation command:"; echo "$cc_call" rm $testsource gccwarning=1 host_cc=1 cat < /dev/null 2>/dev/null; then hasfc=1; fi if [ $hasfc = 0 ]; then cat < $testsource echo " END" >> $testsource if gfortran $testsource -o$testprog && $testprog; then rm $testsource $testprog else rm $testsource cat < /dev/null 2>/dev/null; then name=$(which wget) # By default Wget keeps the remote file's timestamp, so we'll have # to disable it manually. downloader="$name --no-use-server-timestamps -O"; elif type curl > /dev/null 2>/dev/null; then name=$(which curl) # - cURL doesn't keep the remote file's timestamp by default. # - With the `-L' option, we tell cURL to follow redirects. downloader="$name -L -o" else cat < /dev/null; then instring="the already existing" bdir=$(absolute_dir $build_dir) rm -rf $build_dir/$junkname else echo " ** Can't write in '$build_dir'"; fi else if mkdir $build_dir 2> /dev/null; then instring="the newly created" bdir=$(absolute_dir $build_dir) else echo " ** Can't create '$build_dir'"; fi fi # If its given, make sure it isn't a subdirectory of the source # directory. if ! [ x"$bdir" = x ]; then if echo "$bdir/" \ | grep '^'$currentdir 2> /dev/null > /dev/null; then # If it was newly created, it will be empty, so delete it. if ! [ "$(ls -A $bdir)" ]; then rm --dir $bdir; fi # Inform the user that this is not acceptable and reset `bdir'. bdir= echo " ** The build-directory cannot be under the source-directory." fi fi # If everything is fine until now, see if we're able to manipulate # file permissions. if ! [ x"$bdir" = x ]; then if ! $(check_permission $bdir); then # Unable to handle permissions well bdir= echo " ** File permissions can't be modified in this directory" else # Able to handle permissions, now check for 5GB free space # in the given partition (note that the number is in units # of 1024 bytes). If this is not the case, print a warning. if $(free_space_warning 5000000 $bdir); then echo " !! LESS THAN 5GB FREE SPACE IN: $bdir" echo " !! We recommend choosing another partition." echo " !! Build will continue in 5 seconds..." sleep 5 fi fi fi # If the build directory was good, the loop will stop, if not, # reset `build_dir' to blank, so it continues asking for another # directory and let the user know that they must select a new # directory. if [ x$bdir = x ]; then build_dir= echo " ** Please select another directory." echo "" else echo " -- Build directory set to ($instring): '$bdir'" fi done fi # Input directory # --------------- if [ x"$input_dir" = x ]; then indir=$optionaldir else indir=$input_dir fi noninteractive_sleep=2 if [ $rewritepconfig = yes ] && [ x"$input_dir" = x ]; then cat <> $pconf else # Read the values from existing configuration file. inbdir=$(awk '$1=="BDIR" {print $3}' $pconf) # Read the software directory. ddir=$(awk '$1=="DEPENDENCIES-DIR" {print $3}' $pconf) # The downloader command may contain multiple elements, so we'll just # change the (in memory) first and second tokens to empty space and # write the full line (the original file is unchanged). downloader=$(awk '$1=="DOWNLOADER" {$1=""; $2=""; print $0}' $pconf) # Make sure all necessary variables have a value err=0 verr=0 novalue="" if [ x"$inbdir" = x ]; then novalue="BDIR, "; fi if [ x"$downloader" = x ]; then novalue="$novalue"DOWNLOADER; fi if [ x"$novalue" != x ]; then verr=1; err=1; fi # Make sure `bdir' is an absolute path and it exists. berr=0 ierr=0 bdir=$(absolute_dir $inbdir) if ! [ -d $bdir ]; then if ! mkdir $bdir; then berr=1; err=1; fi; fi if [ $err = 1 ]; then cat <8GB) is large enough for the parallel building of the software. # # For the name of the directory under `/dev/shm' (for this project), we'll # use the names of the two parent directories to the current/running # directory, separated by a `-' instead of `/'. We'll then appended that # with the user's name (in case multiple users may be working on similar # project names). Maybe later, we can use something like `mktemp' to add # random characters to this name and make it unique to every run (even for # a single user). tmpblddir=$sdir/build-tmp rm -rf $tmpblddir/* $tmpblddir # If its a link, we need to empty its # contents first, then itself. # Set the top-level shared memory location. if [ -d /dev/shm ]; then shmdir=/dev/shm else shmdir="" fi # If a shared memory mounted directory exists and there is enough space # there (in RAM), build a temporary directory for this project. needed_space=2000000 if [ x"$shmdir" != x ]; then available_space=$(df $shmdir | awk 'NR==2{print $4}') if [ $available_space -gt $needed_space ]; then dirname=$(pwd | sed -e's/\// /g' \ | awk '{l=NF-1; printf("%s-%s",$l, $NF)}') tbshmdir=$shmdir/"$dirname"-$(whoami) if ! [ -d $tbshmdir ]; then mkdir $tbshmdir; fi fi else tbshmdir="" fi # If a shared memory directory was created set `build-tmp' to be a # symbolic link to it. Otherwise, just build the temporary build # directory under the project build directory. if [ x$tbshmdir = x ]; then mkdir $tmpblddir; else ln -s $tbshmdir $tmpblddir; fi # Inform the user that the build process is starting # ------------------------------------------------- if [ $printnotice = yes ]; then tsec=10 cat < /dev/null 2> /dev/null; then numthreads=$(nproc --all); else numthreads=$(sysctl -a | awk '/^hw\.ncpu/{print $2}') if [ x"$numthreads" = x ]; then numthreads=1; fi fi else numthreads=$jobs fi # See if the linker accepts -Wl,-rpath-link # ----------------------------------------- # # `-rpath-link' is used to write the information of the linked shared # library into the shared object (library or program). But some versions of # LLVM's linker don't accept it an can cause problems. # # IMPORTANT NOTE: This test has to be done **AFTER** the definition of # 'instdir', otherwise, it is going to be used as an empty string. cat > $testsource < #include int main(void) {return EXIT_SUCCESS;} EOF if $CC $testsource -o$testprog -Wl,-rpath-link 2>/dev/null > /dev/null; then export rpath_command="-Wl,-rpath-link=$instdir/lib" else export rpath_command="" fi # Delete the compiler testing directory # ------------------------------------- # # This directory was made above to make sure the necessary compilers can be # run. rm -f $testprog $testsource rm -rf $compilertestdir # Paths needed by the host compiler (only for `basic.mk') # ------------------------------------------------------- # # At the end of the basic build, we need to build GCC. But GCC will build # in multiple phases, making its own simple compiler in order to build # itself completely. The intermediate/simple compiler doesn't recognize # some system specific locations like `/usr/lib/ARCHITECTURE' that some # operating systems use. We thus need to tell the intermediate compiler # where its necessary libraries and headers are. if [ x"$sys_library_path" != x ]; then if [ x"$LIBRARY_PATH" = x ]; then export LIBRARY_PATH="$sys_library_path" else export LIBRARY_PATH="$LIBRARY_PATH:$sys_library_path" fi if [ x"$CPATH" = x ]; then export CPATH="$sys_cpath" else export CPATH="$CPATH:$sys_cpath" fi fi # Find Zenodo URL for software downloading # ---------------------------------------- # # All free-software source tarballs that are potentially used in Maneage # are also archived in Zenodo with a certain concept-DOI. A concept-DOI is # a Zenodo terminology, meaning a fixed DOI of the project (that can have # many sub-DOIs for different versions). By default, the concept-DOI points # to the most recently uploaded version. However, the concept-DOI itself is # not directly usable for downloading files. The concept-DOI will just take # us to the top webpage of the most recent version of the upload. # # The problem is that as more software are added (as new Zenodo versions), # the most recent Zenodo-URL that the concept-DOI points to, also # changes. The most reliable solution was found to be the tiny script below # which will download the DOI-resolved webpage, and extract the Zenodo-URL # of the most recent version from there (using the 'coreutils' tarball as # an example, the directory part of the URL for all the other software are # the same). user_backup_urls="" zenodocheck=.build/software/zenodo-check.html if $downloader $zenodocheck https://doi.org/10.5281/zenodo.3883409; then zenodourl=$(sed -n -e'/coreutils/p' $zenodocheck \ | sed -n -e'/http/p' \ | tr ' ' '\n' \ | grep http \ | sed -e 's/href="//' -e 's|/coreutils| |' \ | awk 'NR==1{print $1}') else zenodourl="" fi rm -f $zenodocheck # Add the Zenodo URL to the user's given back software URLs. Since the user # can specify 'user_backup_urls' (not yet implemented as an option in # './project'), we'll give preference to their specified servers, then add # the Zenodo URL afterwards. user_backup_urls="$user_backup_urls $zenodourl" # Build core tools for project # ---------------------------- # # Here we build the core tools that 'basic.mk' depends on: Lzip # (compression program), GNU Make (that 'basic.mk' is written in), Dash # (minimal Bash-like shell) and Flock (to lock files and enable serial # download). ./reproduce/software/shell/pre-make-build.sh \ "$bdir" "$ddir" "$downloader" "$user_backup_urls" # Build other basic tools our own GNU Make # ---------------------------------------- # # When building these software we don't have our own un-packing software, # Bash, Make, or AWK. In this step, we'll install such low-level basic # tools, but we have to be very portable (and use minimal features in all). echo; echo "Building necessary software (if necessary)..." .local/bin/make -k -f reproduce/software/make/basic.mk \ user_backup_urls="$user_backup_urls" \ sys_library_path=$sys_library_path \ rpath_command=$rpath_command \ static_build=$static_build \ numthreads=$numthreads \ needs_ldl=$needs_ldl \ on_mac_os=$on_mac_os \ host_cc=$host_cc \ -j$numthreads # All other software # ------------------ # # We will be making all the dependencies before running the top-level # Makefile. To make the job easier, we'll do it in a Makefile, not a # script. Bash and Make were the tools we need to run Makefiles, so we had # to build them in this script. But after this, we can rely on Makefiles. if [ $jobs = 0 ]; then numthreads=$(.local/bin/nproc --all) else numthreads=$jobs fi .local/bin/env -i HOME=$bdir \ .local/bin/make -k -f reproduce/software/make/high-level.mk \ user_backup_urls="$user_backup_urls" \ sys_library_path=$sys_library_path \ rpath_command=$rpath_command \ all_highlevel=$all_highlevel \ static_build=$static_build \ numthreads=$numthreads \ on_mac_os=$on_mac_os \ sys_cpath=$sys_cpath \ host_cc=$host_cc \ -j$numthreads # Make sure TeX Live installed successfully # ----------------------------------------- # # TeX Live is managed over the internet, so if there isn't any, or it # suddenly gets cut, it can't be built. However, when TeX Live isn't # installed, the project can do all its processing independent of it. It # will just stop at the stage when all the processing is complete and it is # only necessary to build the PDF. So we don't want to stop the project's # configuration and building if its not present. if [ -f $itidir/texlive-ready-tlmgr ]; then texlive_result=$(cat $itidir/texlive-ready-tlmgr) else texlive_result="NOT!" fi if [ x"$texlive_result" = x"NOT!" ]; then cat <0 { \ c++; \ if(c==1) \ { \ if('$num'==1) printf("%s", $0); \ else printf("%s", $0); \ } \ else if(c=='$num') printf(" and %s\n", $0); \ else printf(", %s", $0) \ }' fi } # Import the context/sentences for placing between the list of software # names during their acknowledgment. . $cdir/software_acknowledge_context.sh # Report the different software in separate contexts (separating Python and # TeX packages from the C/C++ programs and libraries). proglibs=$(prepare_name_version $verdir/proglib/*) pymodules=$(prepare_name_version $verdir/python/*) texpkg=$(prepare_name_version $verdir/tex/texlive) # Acknowledge these software packages in a LaTeX paragraph. pkgver=$mtexdir/dependencies.tex # Add the text to the ${pkgver} file. .local/bin/echo "$thank_software_introduce " > $pkgver .local/bin/echo "$thank_progs_libs $proglibs. " >> $pkgver if [ x"$pymodules" != x ]; then .local/bin/echo "$thank_python $pymodules. " >> $pkgver fi .local/bin/echo "$thank_latex $texpkg. " >> $pkgver .local/bin/echo "$thank_software_conclude" >> $pkgver # Prepare the BibTeX entries for the used software (if there are any). hasentry=0 bibfiles="$ictdir/*" for f in $bibfiles; do if [ -f $f ]; then hasentry=1; break; fi; done; # Make sure we start with an empty output file. pkgbib=$mtexdir/dependencies-bib.tex echo "" > $pkgbib # Fill it in with all the BibTeX entries in this directory. We'll just # avoid writing any comments (usually copyright notices) and also put an # empty line after each file's contents to make the output more readable. if [ $hasentry = 1 ]; then for f in $bibfiles; do awk '!/^%/{print} END{print ""}' $f >> $pkgbib done fi # Clean the temporary build directory # --------------------------------- # # By the time the script reaches here the temporary software build # directory should be empty, so just delete it. Note `tmpblddir' may be a # symbolic link to shared memory. So, to work in any scenario, first delete # the contents of the directory (if it has any), then delete `tmpblddir'. .local/bin/rm -rf $tmpblddir/* $tmpblddir # Register successful completion # ------------------------------ echo `.local/bin/date` > $finaltarget # Final notice # ------------ # # The configuration is now complete, we can inform the user on the next # step(s) to take. if [ x$reproducible_paper_group_name = x ]; then buildcommand="./project make -j8" else buildcommand="./project make --group=$reproducible_paper_group_name -j8" fi cat <