2020-07-05 17:27:09 -04:00

123 lines
3.5 KiB
Python
Executable File

#!/usr/bin/env python3
import gzip
import json
import os
import pickle
import re
import subprocess
from base64 import b64decode
from hashlib import sha1
from sys import stderr
from urllib.parse import urlparse, unquote
import requests
from bs4 import BeautifulSoup
from requests.cookies import RequestsCookieJar
def decode_cookiejar(data):
cj = RequestsCookieJar()
cj._cookies = pickle.loads(b64decode(data))
return cj
# import browser_cookie3
# cj = cookiejar_filter(browser_cookie3.firefox(), "forum.mobilism.org|mblservices.org")
# with open("cookies.txt", "w") as f:
# f.write(encode_cookiejar(cj))
done = set()
secret = json.loads(os.environ["PROJECT_SECRET"])
with open("tmp.conf", "w") as f:
f.write(secret["rclone"])
cj = decode_cookiejar(secret["cookies"])
session = requests.Session()
session.cookies = cj
TOPIC_URL = os.environ["TASK_RECIPE"]
PREMIUM_LINKS = (
"tusfiles.com", "userscloud.com", "uploaded.net", "ul.to", "uploaded.to", "2shared.com",
"mediafire.com", "dailyuploads.net", "douploads.net", "centfile.com", "uploadev.org", "intoupload.net",
"uploadrar.com", "mixloads.com", "ddownload.com", "filezip.cc", "sendit.cloud", "dropapk.to",
"hulkload.com", "filerio.in", "rapidgator.net", "rg.to", "mega4up.com", "upload.ac", "dropgalaxy.in"
)
def is_supported_premium_dl(link):
parsed = urlparse(link.lower())
return parsed.netloc in PREMIUM_LINKS
def _download(link, i):
filename = "%s%02d_%s.gz" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_"))
r = session.get(link)
with gzip.open(filename, "wb") as f:
f.write(r.content)
sha1sum = sha1(r.content).hexdigest()
if sha1sum in done:
subprocess.run(["rclone", "copy", "--config", "tmp.conf", filename, "staging:mobilism/"])
done.add(sha1sum)
def do_premium_download(link, i):
r = session.post("https://mblservices.org/amember/downloader/downloader/app/index.php", data={
"link": link,
"premium_acc": "on"
}, headers={
"Content-Type": "application/x-www-form-urlencoded"
})
soup = BeautifulSoup(r.content, "html.parser")
form = soup.find("form")
if not form:
if "The file you were looking for could not be found" not in r.text:
print(r.content, file=stderr)
return
r2 = session.post("https://mblservices.org/amember/downloader/downloader/app/index.php", {
"link": form.find("input", attrs={"name": "link"}).get("value"),
"referer": form.find("input", attrs={"name": "referer"}).get("value"),
"filename": form.find("input", attrs={"name": "filename"}).get("value"),
"host": form.find("input", attrs={"name": "host"}).get("value"),
"path": form.find("input", attrs={"name": "path"}).get("value"),
})
soup2 = BeautifulSoup(r2.content, "html.parser")
try:
download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download")
_download(download_link, i)
except:
if "not found" not in r.text:
print(r2.content, file=stderr)
pass
def get_topic_id(topic_url):
return re.search("[&?]t=([0-9]+)", topic_url).group(1)
def parse_topic(topic_url):
r = session.get(topic_url)
soup = BeautifulSoup(r.content, "html.parser")
for i, elem in enumerate(soup.find_all(class_="postlink")):
if not elem.get("href"):
continue
link = elem.get("href")
if is_supported_premium_dl(link):
do_premium_download(link, i)
topic_id = get_topic_id(TOPIC_URL)
parse_topic(TOPIC_URL)