diff options
author | Mohammad Akhlaghi <mohammad@akhlaghi.org> | 2019-02-06 18:08:19 +0000 |
---|---|---|
committer | Mohammad Akhlaghi <mohammad@akhlaghi.org> | 2019-02-06 18:08:19 +0000 |
commit | 1c508e636b90ae170213ccf71771711156dd8f52 (patch) | |
tree | 0e9c6e306fc231a92497275d621e21d53737ad48 /reproduce/src | |
parent | 33e00f02d4ecd28ea5084fc553d2ad182a11ca52 (diff) |
Wrapper script for multiple attempts at downloading inputs
Until now, downloading was treated similar to any other operation in the
Makefile: if it crashes, the pipeline would crash. But network errors
aren't like processing errors: attempting to download a second time will
probably not crash (network relays are very complex and not reproducible
and packages get lost all the time)!
This is usually not felt in downloading one or two files, but when
downloading many thousands of files, it will happen every once and a while
and its a real waste of time until you check to just press enter again!
With this commit we have the `reproduce/src/bash/download-multi-try.sh'
script in the pipeline which will repeat the downoad several times (with
incrasing time intervals) before crashing and thus fix the problem.
Diffstat (limited to 'reproduce/src')
-rwxr-xr-x | reproduce/src/bash/download-multi-try.sh | 108 | ||||
-rw-r--r-- | reproduce/src/make/download.mk | 4 |
2 files changed, 110 insertions, 2 deletions
diff --git a/reproduce/src/bash/download-multi-try.sh b/reproduce/src/bash/download-multi-try.sh new file mode 100755 index 0000000..31490c9 --- /dev/null +++ b/reproduce/src/bash/download-multi-try.sh @@ -0,0 +1,108 @@ +#!.local/bin/bash +# +# Try downloading multiple times before crashing whole pipeline +# +# $ ./download-multi-try downloader lockfile input-url downloaded-name +# +# Due to temporary network problems, a download may fail suddenly, but +# succeed in a second try a few seconds later. Without this script that +# temporary glitch in the network will permanently crash the pipeline and +# it can't continue. The job of this script is to be patient and try the +# download multiple times before crashing the whole pipeline. +# +# LOCK FILE: Since there is ultimately only one network port to the outside +# world, downloading is done much faster in serial, not in parallel. But +# the pipeline's processing may be done in parallel (with multiple threads +# needing to download different files at the same time). Therefore, this +# script uses the `flock' program to only do one download at a time. To +# benefit from it, any call to this script must be given the same lock +# file. +# +# Original author: +# Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Contributing author(s): +# Copyright (C) 2019, Mohammad Akhlaghi. +# +# This script is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This script is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. See <http://www.gnu.org/licenses/>. + + + + + +# Script settings +# --------------- +# Stop the script if there are any errors. +set -e + + + + + +# Input arguments and necessary sanity checks. +inurl="$3" +outname="$4" +lockfile="$2" +downloader="$1" +if [ "x$downloader" = x ]; then + echo "$0: downloader (first argument) not given."; exit 1; +fi +if [ "x$lockfile" = x ]; then + echo "$0: lock file (second argument) not given."; exit 1; +fi +if [ "x$inurl" = x ]; then + echo "$0: full input URL (third argument) not given."; exit 1; +fi +if [ "x$outname" = x ]; then + echo "$0: output name (fourth argument) not given."; exit 1; +fi + + + + + +# Try downloading multiple times before crashing +counter=0 +maxcounter=10 +while [ ! -f "$outname" ]; do + + # Increment the counter. We need the `counter=' part here because + # without it the evaluation of arithmetic expression will be like and + # error and the script is set to crash on errors. + counter=$((counter+1)) + + # If we have passed a maximum number of trials, just exit with + # a failed code. + if (( counter > maxcounter )); then + echo + echo "Failed $maxcounter download attempts: $outname" + echo + exit 1 + fi + + # If this isn't the first attempt print a notice and wait a little for + # the next trail. + if (( counter > 1 )); then + tstep=$((counter*5)) + echo "Download trial $counter for '$outname' in $tstep seconds." + sleep $tstep + fi + + # Attempt downloading the file (one-at-a-time). + flock "$lockfile" bash -c \ + "if ! $downloader -O$outname $inurl; then rm -f $outname; fi" +done + + + + + +# Return successfully +exit 0 diff --git a/reproduce/src/make/download.mk b/reproduce/src/make/download.mk index f83ad6f..1fc51e6 100644 --- a/reproduce/src/make/download.mk +++ b/reproduce/src/make/download.mk @@ -52,6 +52,7 @@ # process with a file and make sure that only one downloading event is in # progress at every moment. $(indir):; mkdir $@ +downloadwrapper = $(srcdir)/bash/download-multi-try.sh inputdatasets = $(foreach i, wfpc2, $(indir)/$(i).fits) $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) @@ -68,8 +69,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) ln -s $(INDIR)/$$origname $@ else touch $(lockdir)/download - flock $(lockdir)/download bash -c \ - "if ! wget -O$@ $$url/$$origname; then rm -f $@; exit 1; fi" + $(downloadwrapper) wget $(lockdir)/download $$url/$$origname $@ fi # Check the md5 sum to see if this is the proper dataset. |