From 1c508e636b90ae170213ccf71771711156dd8f52 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Wed, 6 Feb 2019 18:08:19 +0000 Subject: Wrapper script for multiple attempts at downloading inputs Until now, downloading was treated similar to any other operation in the Makefile: if it crashes, the pipeline would crash. But network errors aren't like processing errors: attempting to download a second time will probably not crash (network relays are very complex and not reproducible and packages get lost all the time)! This is usually not felt in downloading one or two files, but when downloading many thousands of files, it will happen every once and a while and its a real waste of time until you check to just press enter again! With this commit we have the `reproduce/src/bash/download-multi-try.sh' script in the pipeline which will repeat the downoad several times (with incrasing time intervals) before crashing and thus fix the problem. --- reproduce/src/bash/download-multi-try.sh | 108 +++++++++++++++++++++++++++++++ reproduce/src/make/download.mk | 4 +- 2 files changed, 110 insertions(+), 2 deletions(-) create mode 100755 reproduce/src/bash/download-multi-try.sh (limited to 'reproduce') diff --git a/reproduce/src/bash/download-multi-try.sh b/reproduce/src/bash/download-multi-try.sh new file mode 100755 index 0000000..31490c9 --- /dev/null +++ b/reproduce/src/bash/download-multi-try.sh @@ -0,0 +1,108 @@ +#!.local/bin/bash +# +# Try downloading multiple times before crashing whole pipeline +# +# $ ./download-multi-try downloader lockfile input-url downloaded-name +# +# Due to temporary network problems, a download may fail suddenly, but +# succeed in a second try a few seconds later. Without this script that +# temporary glitch in the network will permanently crash the pipeline and +# it can't continue. The job of this script is to be patient and try the +# download multiple times before crashing the whole pipeline. +# +# LOCK FILE: Since there is ultimately only one network port to the outside +# world, downloading is done much faster in serial, not in parallel. But +# the pipeline's processing may be done in parallel (with multiple threads +# needing to download different files at the same time). Therefore, this +# script uses the `flock' program to only do one download at a time. To +# benefit from it, any call to this script must be given the same lock +# file. +# +# Original author: +# Mohammad Akhlaghi +# Contributing author(s): +# Copyright (C) 2019, Mohammad Akhlaghi. +# +# This script is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This script is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. See . + + + + + +# Script settings +# --------------- +# Stop the script if there are any errors. +set -e + + + + + +# Input arguments and necessary sanity checks. +inurl="$3" +outname="$4" +lockfile="$2" +downloader="$1" +if [ "x$downloader" = x ]; then + echo "$0: downloader (first argument) not given."; exit 1; +fi +if [ "x$lockfile" = x ]; then + echo "$0: lock file (second argument) not given."; exit 1; +fi +if [ "x$inurl" = x ]; then + echo "$0: full input URL (third argument) not given."; exit 1; +fi +if [ "x$outname" = x ]; then + echo "$0: output name (fourth argument) not given."; exit 1; +fi + + + + + +# Try downloading multiple times before crashing +counter=0 +maxcounter=10 +while [ ! -f "$outname" ]; do + + # Increment the counter. We need the `counter=' part here because + # without it the evaluation of arithmetic expression will be like and + # error and the script is set to crash on errors. + counter=$((counter+1)) + + # If we have passed a maximum number of trials, just exit with + # a failed code. + if (( counter > maxcounter )); then + echo + echo "Failed $maxcounter download attempts: $outname" + echo + exit 1 + fi + + # If this isn't the first attempt print a notice and wait a little for + # the next trail. + if (( counter > 1 )); then + tstep=$((counter*5)) + echo "Download trial $counter for '$outname' in $tstep seconds." + sleep $tstep + fi + + # Attempt downloading the file (one-at-a-time). + flock "$lockfile" bash -c \ + "if ! $downloader -O$outname $inurl; then rm -f $outname; fi" +done + + + + + +# Return successfully +exit 0 diff --git a/reproduce/src/make/download.mk b/reproduce/src/make/download.mk index f83ad6f..1fc51e6 100644 --- a/reproduce/src/make/download.mk +++ b/reproduce/src/make/download.mk @@ -52,6 +52,7 @@ # process with a file and make sure that only one downloading event is in # progress at every moment. $(indir):; mkdir $@ +downloadwrapper = $(srcdir)/bash/download-multi-try.sh inputdatasets = $(foreach i, wfpc2, $(indir)/$(i).fits) $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) @@ -68,8 +69,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) ln -s $(INDIR)/$$origname $@ else touch $(lockdir)/download - flock $(lockdir)/download bash -c \ - "if ! wget -O$@ $$url/$$origname; then rm -f $@; exit 1; fi" + $(downloadwrapper) wget $(lockdir)/download $$url/$$origname $@ fi # Check the md5 sum to see if this is the proper dataset. -- cgit v1.2.1