aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis/bash/download-multi-try
blob: 8d10bf4e9e8d291fcc545b4ffebcc93096dbab14 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/bin/bash
#
# Attempt downloading multiple times before crashing whole project. From
# the top project directory (for the shebang above), this script must be
# run like this:
#
#   $ /path/to/download-multi-try downloader lockfile input-url downloaded-name
#
# NOTE: The `downloader' must contain the option to specify the output name
# in its end. For example "wget -O". Any other option can also be placed in
# the middle.
#
# Due to temporary network problems, a download may fail suddenly, but
# succeed in a second try a few seconds later. Without this script that
# temporary glitch in the network will permanently crash the project and
# it can't continue. The job of this script is to be patient and try the
# download multiple times before crashing the whole project.
#
# LOCK FILE: Since there is usually only one network port to the outside
# world, downloading is done much faster in serial, not in parallel. But
# the project's processing may be done in parallel (with multiple threads
# needing to download different files at the same time). Therefore, this
# script uses the `flock' program to only do one download at a time. To
# benefit from it, any call to this script must be given the same lock
# file. If your system has multiple ports to the internet, or for any
# reason, you don't want to use a lock file, set the `lockfile' name to
# `nolock'.
#
# Copyright (C) 2019-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.





# Script settings
# ---------------
# Stop the script if there are any errors.
set -e





# Input arguments and necessary sanity checks. Note that the 5th argument
# (backup servers) isn't mandatory.
inurl="$3"
outname="$4"
lockfile="$2"
downloader="$1"
backupservers="$5"
if [ "x$downloader" = x ]; then
    echo "$0: downloader (first argument) not given."; exit 1;
fi
if [ "x$lockfile" = x ]; then
    echo "$0: lock file (second argument) not given."; exit 1;
fi
if [ "x$inurl" = x ]; then
    echo "$0: full input URL (third argument) not given."; exit 1;
fi
if [ "x$outname" = x ]; then
    echo "$0: output name (fourth argument) not given."; exit 1;
fi





# Separate the actual filename, to possibly use backup server.
urlfile=$(echo "$inurl" | awk -F "/" '{print $NF}')





# Try downloading multiple times before crashing.
counter=0
maxcounter=10
while [ ! -f "$outname" ]; do

    # Increment the counter. We need the `counter=' part here because
    # without it the evaluation of arithmetic expression will be like and
    # error and the script is set to crash on errors.
    counter=$((counter+1))

    # If we have passed a maximum number of trials, just exit with
    # a failed code.
    if (( counter > maxcounter )); then
        echo
	echo "Failed $maxcounter download attempts: $outname"
        echo
	exit 1
    fi

    # If this isn't the first attempt print a notice and wait a little for
    # the next trail.
    if (( counter > 1 )); then
	tstep=$((counter*5))
        echo "Download trial $counter for '$outname' in $tstep seconds."
        sleep $tstep
    fi

    # Attempt downloading the file (one-at-a-time). Note that the
    # `downloader' ends with the respective option to specify the output
    # name. For example "wget -O" (so `outname', that comes after it) will
    # be the name of the downloaded file.
    if [ x"$lockfile" = xnolock ]; then
        if ! $downloader $outname $inurl; then rm -f $outname; fi
    else
        # Try downloading from the requested URL.
        flock "$lockfile" bash -c \
              "if ! $downloader $outname $inurl; then rm -f $outname; fi"

        # If it failed, try the backup server(s).
        if [ ! -f "$outname" ]; then
            if [ x"$backupservers" != x ]; then
                for bs in "$backupservers"; do
                    flock "$lockfile" bash -c \
                          "if ! $downloader $outname $bs/$urlfile; then rm -f $outname; fi"
                done
            fi
        fi
    fi

done





# Return successfully
exit 0