aboutsummaryrefslogtreecommitdiff
path: root/reproduce/analysis/bash/download-multi-try
blob: d7e9be2da85f3c6471909fc0f876f82aaf1bc694 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/bin/sh
#
# Attempt downloading multiple times before crashing whole project. From
# the top project directory (for the shebang above), this script must be
# run like this:
#
#   $ /path/to/download-multi-try downloader lockfile input-url downloaded-name
#
# NOTE: The 'downloader' must contain the option to specify the output name
# in its end. For example "wget -O". Any other option can also be placed in
# the middle.
#
# Due to temporary network problems, a download may fail suddenly, but
# succeed in a second try a few seconds later. Without this script that
# temporary glitch in the network will permanently crash the project and
# it can't continue. The job of this script is to be patient and try the
# download multiple times before crashing the whole project.
#
# LOCK FILE: Since there is usually only one network port to the outside
# world, downloading is done much faster in serial, not in parallel. But
# the project's processing may be done in parallel (with multiple threads
# needing to download different files at the same time). Therefore, this
# script uses the 'flock' program to only do one download at a time. To
# benefit from it, any call to this script must be given the same lock
# file. If your system has multiple ports to the internet, or for any
# reason, you don't want to use a lock file, set the 'lockfile' name to
# 'nolock'.
#
# Copyright (C) 2019-2023 Mohammad Akhlaghi <mohammad@akhlaghi.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.





# Script settings
# ---------------
# Stop the script if there are any errors.
set -e





# Input arguments and necessary sanity checks. Note that the 5th argument
# (backup servers) isn't mandatory.
inurl="$3"
outname="$4"
lockfile="$2"
downloader="$1"
backupservers="$5"
if [ "x$downloader" = x ]; then
    echo "$0: downloader (first argument) not given."; exit 1;
fi
if [ "x$lockfile" = x ]; then
    echo "$0: lock file (second argument) not given."; exit 1;
fi
if [ "x$inurl" = x ]; then
    echo "$0: full input URL (third argument) not given."; exit 1;
fi
if [ "x$outname" = x ]; then
    echo "$0: output name (fourth argument) not given."; exit 1;
fi





# Separate the actual filename, to possibly use backup server.
urlfile=$(echo "$inurl" | awk -F "/" '{print $NF}')





# Try downloading multiple times before crashing.
counter=0
maxcounter=10
while [ ! -f "$outname" ]; do

    # Increment the counter.
    counter=$(echo $counter | awk '{print $1+1}')

    # If we have passed a maximum number of trials, just exit with
    # a failed code.
    reachedmax=$(echo $counter \
         | awk '{if($1>'$maxcounter') print "yes"; else print "no";}')
    if [ x$reachedmax = xyes ]; then
        echo ""
	echo "Failed $maxcounter download attempts: $outname"
        echo ""
	exit 1
    fi

    # If this isn't the first attempt print a notice and wait a little for
    # the next trail.
    if [ x$counter = x1 ]; then
        just_a_place_holder=1
    else
        tstep=$(echo $counter | awk '{print $1*5}')
        echo "Download trial $counter for '$outname' in $tstep seconds."
        sleep $tstep
    fi

    # Attempt downloading the file. Note that the 'downloader' ends with
    # the respective option to specify the output name. For example "wget
    # -O" (so 'outname', that comes after it) will be the name of the
    # downloaded file.
    if [ x"$lockfile" = xnolock ]; then
        if ! $downloader $outname $inurl; then rm -f $outname; fi
    else
        # Try downloading from the requested URL.
        flock "$lockfile" sh -c \
              "if ! $downloader $outname \"$inurl\"; then rm -f $outname; fi"
    fi

    # If the download failed, try the backup server(s).
    if [ ! -f "$outname" ]; then
        if [ x"$backupservers" != x ]; then
            for bs in $backupservers; do

                # Use this backup server.
                if [ x"$lockfile" = xnolock ]; then
                    if ! $downloader $outname $bs/$urlfile; then rm -f $outname; fi
                else
                    flock "$lockfile" sh -c \
                          "if ! $downloader $outname $bs/$urlfile; then rm -f $outname; fi"
                fi

                # If the file was downloaded, break out of the loop that
                # parses over the backup servers.
                if [ -f "$outname" ]; then break; fi
            done
        fi
    fi
done





# Return successfully
exit 0