Compare commits

...

3 Commits

Author SHA1 Message Date
r4 5ac4fa9d99 better, more reliable scraping 2022-06-23 22:31:11 +02:00
r4 81d0322406 allow updating index without specifying episode(s) 2022-06-23 20:42:14 +02:00
r4 e631ad6009 allow symlinks to executable 2022-06-23 20:35:29 +02:00
2 changed files with 22 additions and 15 deletions

View File

@ -2,7 +2,8 @@
# South Park Downloader Configuration File #
############################################
# Relative or absolute paths are allowed. Please DON'T use ~/, but rather $HOME or /home/$USER.
# Absolute paths or paths relative to the executable are allowed.
# Please DON'T use ~/, but rather $HOME or /home/$USER.
# youtube-dl (or youtube-dlc) executable path
YOUTUBE_DL="./yt-dlc/youtube-dlc"

View File

@ -1,14 +1,17 @@
#!/usr/bin/env bash
source "$(dirname "$0")/config.sh"
# Resolve executable directory
DIR="$(dirname "$(readlink -e "$0")")"
# Turn paths into absolute ones, if they aren't already. Will be necessary, since we'll change directories later.
source "$DIR/config.sh"
# Turn paths into absolute ones if they aren't already. Will be necessary since we'll change directories later.
[ ! "${CACHEDIR::1}" = "/" ] &&
CACHEDIR="$(readlink -f "$(dirname "$0")/$CACHEDIR")"
CACHEDIR="$DIR/$CACHEDIR"
[ ! "${OUTDIR::1}" = "/" ] &&
OUTDIR="$(readlink -f "$(dirname "$0")/$OUTDIR")"
OUTDIR="$DIR/$OUTDIR"
[ ! "${YOUTUBE_DL::1}" = "/" ] &&
YOUTUBE_DL="$(readlink -f "$(dirname "$0")/$YOUTUBE_DL")"
YOUTUBE_DL="$DIR/$YOUTUBE_DL"
[ ! -e "$OUTDIR" ] && mkdir -p "$OUTDIR"
[ ! -e "$CACHEDIR" ] && mkdir -p "$CACHEDIR"
@ -37,7 +40,7 @@ usage() {
echo " -d - Dry run: don't download, just print out URLs"
}
unset OPT_SEASON OPT_EPISODE OPT_ALL OPT_EN OPT_LANG OPT_PROGRESS OPT_UPDATE_INDEX OPT_DRY
unset OPT_SEASON OPT_EPISODE OPT_ALL OPT_EN OPT_LANG OPT_PROGRESS OPT_UPDATE_INDEX OPT_UPDATE_INDEX_EXPLICIT OPT_DRY
OPT_LANG="EN"
OPT_PROGRESS=true
OPT_UPDATE_INDEX=true
@ -72,6 +75,7 @@ while getopts "pPEDuUdas:e:h" arg; do
;;
u)
OPT_UPDATE_INDEX=true
OPT_UPDATE_INDEX_EXPLICIT=true
;;
U)
unset OPT_UPDATE_INDEX
@ -89,26 +93,26 @@ done
if [ "$OPT_LANG" = "DE" ]; then
INDEX_FILENAME="$CACHEDIR/_episode_index_DE_"
INDEX_INITIAL_URL="https://www.southpark.de/folgen/940f8z/south-park-cartman-und-die-analsonde-staffel-1-ep-1"
REGEX_EPISODE_URL="\"/folgen/[0-9a-z]\+/south-park-[0-9a-z-]\+-staffel-[0-9]\+-ep-[0-9]\+\""
REGEX_EPISODE_URL="/folgen/[0-9a-z]\\+/south-park-[0-9a-z-]\\+-staffel-[0-9]\\+-ep-[0-9]\\+"
elif [ "$OPT_LANG" = "EN" ]; then
INDEX_FILENAME="$CACHEDIR/_episode_index_EN_"
INDEX_INITIAL_URL="https://www.southpark.de/en/episodes/940f8z/south-park-cartman-gets-an-anal-probe-season-1-ep-1"
REGEX_EPISODE_URL="\"/en/episodes/[0-9a-z]\+/south-park-[0-9a-z-]\+-season-[0-9]\+-ep-[0-9]\+\""
REGEX_EPISODE_URL="/en/episodes/[0-9a-z]\\+/south-park-[0-9a-z-]\\+-season-[0-9]\\+-ep-[0-9]\\+"
fi
update_index() {
[ ! -e "$INDEX_FILENAME" ] && echo "$INDEX_INITIAL_URL" > "$INDEX_FILENAME"
echo -ne "\e[32m>>> Updating episode index\e[m"
while true; do
local URL="$(tail -n1 "$INDEX_FILENAME")"
local NEWURLS="$(curl -s "$URL" | grep -o "$REGEX_EPISODE_URL" | tr -d "\"" | sed -E "s/^/https:\/\/www.southpark.de/g")"
[ "$URL" = "$(printf "$NEWURLS" | tail -n1)" ] && break
local SEEDURL="$(tail -n1 "$INDEX_FILENAME" | tr -d '\n')"
local HTML="$(curl -s "$SEEDURL")"
local URLS="$(echo -n "$HTML" | sed 's@</a>@\n@g' | sed -n "s@.*href=\"\\($REGEX_EPISODE_URL\\)\".*@\\1@p" | sed "s@^@https://www.southpark.de@g")"
# The sed command only retains all matches after the seed URL
local NEWURLS="$(echo -n "$URLS" | sed -n "\\@^$SEEDURL\$@{:1;n;p;b1}")"
[ -z "$NEWURLS" ] && break
echo "$NEWURLS" >> "$INDEX_FILENAME"
echo -ne "\e[32m.\e[m"
done
# The awk command removes duplicate lines
local NEW_INDEX="$(awk '!x[$0]++' "$INDEX_FILENAME")"
printf "$NEW_INDEX" > "$INDEX_FILENAME"
echo
}
@ -233,6 +237,8 @@ elif [ -n "$OPT_ALL" ]; then
[ -n "$OPT_UPDATE_INDEX" ] && update_index
p_info "Going to download ALL episodes"
download_all
elif [ -n "$OPT_UPDATE_INDEX_EXPLICIT" ] && [ -n "$OPT_UPDATE_INDEX" ]; then
update_index
else
usage
exit 1