-
Notifications
You must be signed in to change notification settings - Fork 6
/
social-media-extract-profile-link
executable file
·77 lines (71 loc) · 2.23 KB
/
social-media-extract-profile-link
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/bash
# Given social media links on stdin or as args, this extracts the link in the profile description, if any.
function verbose_echo {
if [[ "${verbose}" ]]
then
echo "$@"
fi
}
function fetch {
verbose_echo "Fetching $1" >&2
curl -sL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1"
}
function fetch_n_extract {
url="$1"
if [[ "${url}" == *'facebook.com/'* ]]
then
page="$(fetch "${url}")"
if grep -qF '"tab_home"' <<<"${page}"
then
# Publicly accessible profile
grep -Po '"website_url":\K"[^"]+"' <<<"${page}" | python3 -c 'import json, sys'$'\n''for line in sys.stdin:'$'\n'' print(json.loads(line))' | awk '!seen[$0]++'
elif grep -qF 'id="pagelet_loggedout_sign_up"' <<< "${page}"
then
# Profile overview only
grep -Po '<div\s([^<]*\s)?id\s*=\s*"pagelet_contact".*<div\s([^<]*\s)?id\s*=\s*"bottomContent"' <<<"${page}" | grep -Po 'href="https://l\.facebook\.com/l\.php\?u=\K[^&]+' | python3 -c 'import sys, urllib.parse; sys.stdout.write(urllib.parse.unquote(sys.stdin.read()))'
fi
elif [[ "${url}" == *'instagram.com/'* ]]
then
fetch "${url}" | grep -Po '"external_url":"\K[^"]+'
sleep 3 # To avoid getting banned
elif [[ "${url}" == *'twitter.com/'* ]]
then
fetch "${url}" | tr -d '\n' | grep -Po '<div\s+([^>]*\s)?class\s*=\s*"([^"]*\s)?ProfileHeaderCard-url(\s[^"]*)?">.*?</div>' | grep -Po '<a\s(?=([^>]*\s)?class\s*=\s*"([^"]*\s)?u-textUserColor(\s[^"]*)?")([^>]*\s)?title="\K[^"]+'
elif [[ "${url}" == *'youtube.com/'* ]]
then
if [[ "${url}" == *'?'* ]]; then u="${url}&disable_polymer=1"; else u="${url}?disable_polymer=1"; fi
fetch "${u}" | tr -d '\n' | grep -Po '<div\s([^>]*\s)?id\s*=\s*"header-links".*?</div>' | grep -Po 'href="/redirect\?([^"]*&(amp;)?)?q=\K[^&"]+' | python3 -c 'import sys, urllib.parse; sys.stdout.write(urllib.parse.unquote(sys.stdin.read()))'
fi
}
verbose=
for arg in "$@"
do
if [[ "${arg}" == '--verbose' || "${arg}" == '-v' ]]
then
verbose=1
shift
elif [[ "${arg}" == '--' ]]
then
shift
else
# Assume end of options
break
fi
done
{
for arg in "$@"
do
echo "${arg}"
done
if [ ! -t 0 ]
then
cat
fi
} | while read -r url
do
if [[ "${url}" == '* '* ]]
then
url="${url:2}"
fi
fetch_n_extract "${url}"
done