Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better download mechanism with aria2c and code beautified #698

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 39 additions & 31 deletions src/bin/download_clone.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
# You should have received a copy of the GNU Affero General Public License
# along with Overpass_API. If not, see <https://www.gnu.org/licenses/>.

EXEC_DIR="`pwd`/../"
set -euo pipefail

CLONE_DIR="$1"
REMOTE_DIR=
SOURCE=
DONE=
META=
TEMP_FILE=/tmp/ovepass_files_list

if [[ -z $1 ]]; then
{
Expand Down Expand Up @@ -52,9 +53,9 @@ process_param()
fi
};

if [[ -n $1 ]]; then process_param $1; fi
if [[ -n $2 ]]; then process_param $2; fi
if [[ -n $3 ]]; then process_param $3; fi
if [[ -n ${1} ]]; then process_param "${1}"; fi
if [[ -n ${2} ]]; then process_param "${2}"; fi
if [[ -n ${3} ]]; then process_param "${3}"; fi

FILES_BASE="\
nodes.bin nodes.map node_tags_local.bin node_tags_global.bin node_frequent_tags.bin node_keys.bin \
Expand All @@ -79,61 +80,68 @@ relation_changelog.bin relation_tags_local_attic.bin relation_tags_global_attic.
# $2 - local destination
fetch_file()
{
wget -c -O "$2" "$1"
wget -c -O "${2}" "${1}"
};

retry_fetch_file()
{
DEADLINE=$(($(date '+%s') + 86400))
rm -f "$2"
fetch_file "$1" "$2"
until [[ -s "$2" ]]; do {
if [[ $(date '+%s') -ge $DEADLINE ]]; then
echo "File $1 unavailable. Aborting."
rm -f "${2}"
fetch_file "${1}" "${2}"
until [[ -s "${2}" ]]; do {
if [[ $(date '+%s' || true) -ge ${DEADLINE} ]]; then
echo "File ${1} unavailable. Aborting."
exit 1
fi
sleep 15
fetch_file "$1" "$2"
fetch_file "${1}" "${2}"
}; done
};

download_file()
parallel_download ()
{
echo
echo "Fetching $1"
retry_fetch_file "$REMOTE_DIR/$1" "$CLONE_DIR/$1"
echo "Fetching $1.idx"
retry_fetch_file "$REMOTE_DIR/$1.idx" "$CLONE_DIR/$1.idx"
}
aria2c -d "${CLONE_DIR}" -j 16 -R -x 16 -i "${TEMP_FILE}"
};

mkdir -p "$CLONE_DIR"
fetch_file "$SOURCE/trigger_clone" "$CLONE_DIR/base-url"
mkdir -p "${CLONE_DIR}"
fetch_file "${SOURCE}/trigger_clone" "${CLONE_DIR}/base-url"

REMOTE_DIR=$(cat <"$CLONE_DIR/base-url")
REMOTE_DIR=$(cat < "${CLONE_DIR}/base-url")
#echo "Triggered generation of a recent clone"
#sleep 30

retry_fetch_file "$REMOTE_DIR/replicate_id" "$CLONE_DIR/replicate_id"
retry_fetch_file "${REMOTE_DIR}/replicate_id" "${CLONE_DIR}/replicate_id"

for I in $FILES_BASE; do
rm -f "${TEMP_FILE}}"
touch "${TEMP_FILE}"
for I in ${FILES_BASE}; do
{
download_file $I
echo "${REMOTE_DIR}/${I}" >> "${TEMP_FILE}"
echo "${REMOTE_DIR}/${I}.idx" >> "${TEMP_FILE}"
}; done
parallel_download

if [[ $META == "yes" || $META == "attic" ]]; then
rm -f "${TEMP_FILE}}"
if [[ ${META} == "yes" || ${META} == "attic" ]]; then
{
for I in $FILES_META; do
for I in ${FILES_META}; do
{
download_file $I
echo "${REMOTE_DIR}/${I}" >> "${TEMP_FILE}"
echo "${REMOTE_DIR}/${I}.idx" >> "${TEMP_FILE}"
}; done
parallel_download
}; fi

if [[ $META == "attic" ]]; then
rm -f "${TEMP_FILE}}"
if [[ ${META} == "attic" ]]; then
{
for I in $FILES_ATTIC; do
for I in ${FILES_ATTIC}; do
{
download_file $I
echo "${REMOTE_DIR}/${I}" >> "${TEMP_FILE}"
echo "${REMOTE_DIR}/${I}.idx" >> "${TEMP_FILE}"
}; done
parallel_download
}; fi
rm -f "${TEMP_FILE}}"

echo " database ready."