#!/usr/bin/env python3 import json import os import re import subprocess import tarfile from io import BytesIO from sys import stderr from tarfile import TarInfo, TarFile from time import time from urllib.parse import urlparse, unquote import requests from bs4 import BeautifulSoup secret = json.loads(os.environ["PROJECT_SECRET"]) with open("tmp.conf", "w") as f: f.write(secret["rclone"]) session = requests.Session() TOPIC_URL = os.environ["TASK_RECIPE"] METADATA = { "ts": time(), "topic_url": TOPIC_URL } # See https://forum.mobilism.org/filehosts.xml PREMIUM_LINKS = { "tusfiles.com", "userscloud.com", "uploaded.net", "ul.to", "uploaded.to", "2shared.com", "mediafire.com", "dailyuploads.net", "douploads.net", "centfile.com", "uploadev.org", "intoupload.net", "mixloads.com", "ddownload.com", "filezip.cc", "sendit.cloud", "dropapk.to", "hulkload.com", "filerio.in", "rapidgator.net", "rg.to", "mega4up.com", "upload.ac", "dropgalaxy.in", "rapidshare.com", "uplod.it", "2shared.com", "billionuploads.com", "tusfiles.com", "dropapk.com", "dropapk.to", "douploads.com", "douploads.me", "dailyuploads.net", "dailyuploads.cc", "upload.ac", "ddl.to", "ddownload.com", "uploadev.com", "uploadev.org", "uploadrar.com", "uploadrar.net", "filetitle.com", "filerio.in", "mega4up.com", "filezip.cc", "dropgalaxy.in" } def is_supported_premium_dl(link): parsed = urlparse(link.lower()) return parsed.netloc in PREMIUM_LINKS def add_buf_to_tar(tar: TarFile, filename, data: bytes): buf = BytesIO() buf.write(data) buf.flush() buf.seek(0) info = TarInfo(name=filename) info.size = len(data) tar.addfile(info, buf) def _download(link, i): filename = "%s%02d_%s" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_")) r = session.get(link) with tarfile.open(filename + ".tar.gz", "w:gz") as tar: add_buf_to_tar(tar, filename, r.content) add_buf_to_tar(tar, "meta.json", json.dumps(METADATA).encode()) subprocess.run(["rclone", "copy", "--config", "tmp.conf", filename + ".tar.gz", "staging:mobilism/"]) def do_premium_download(link, i): r = session.post("https://mblservices.org/amember/downloader/downloader/app/index.php", data={ "link": link, "premium_acc": "on" }, headers={ "Content-Type": "application/x-www-form-urlencoded" }) METADATA["do_premium_download"] = { "link": link, "response": r.text } soup = BeautifulSoup(r.content, "html.parser") form = soup.find("form") if not form: if "The file you were looking for could not be found" not in r.text: print(r.content, file=stderr) return data = { "link": form.find("input", attrs={"name": "link"}).get("value"), "referer": form.find("input", attrs={"name": "referer"}).get("value"), "filename": form.find("input", attrs={"name": "filename"}).get("value"), "host": form.find("input", attrs={"name": "host"}).get("value"), "path": form.find("input", attrs={"name": "path"}).get("value"), } port_el = form.find("input", attrs={"name": "port"}) if port_el: data["port"] = port_el.get("value") r2 = session.post("https://mblservices.org/amember/downloader/downloader/app/index.php", data) METADATA["do_premium_download2"] = { "data": data, "response": r2.text } soup2 = BeautifulSoup(r2.content, "html.parser") try: download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download") _download(download_link, i) except Exception as e: if "not found" not in r2.text: print(r2.content, file=stderr) raise e def get_topic_id(topic_url): return re.search("[&?]t=([0-9]+)", topic_url).group(1) def parse_topic(topic_url): r = session.get(topic_url) soup = BeautifulSoup(r.content, "html.parser") METADATA["parse_topic"] = { "topic_url": topic_id, "response": r.text } for i, elem in enumerate(soup.find_all(class_="postlink")): if not elem.get("href"): continue link = elem.get("href") if is_supported_premium_dl(link): try: do_premium_download(link, i) break except: continue def login(): r = session.get("https://forum.mobilism.org/ucp.php?mode=login") soup = BeautifulSoup(r.content, "html.parser") r = session.post("https://forum.mobilism.org/ucp.php?mode=login", data={ "username": "78419273891", "password": "uprising-5overtly", "login": "Login", "redirect": ["./ucp.php?mode=login", "index.php"], "sid": soup.find("input", attrs={"name": "sid"}).get("value") }, headers={ "Content-Type": "application/x-www-form-urlencoded" }) topic_id = get_topic_id(TOPIC_URL) login() parse_topic(TOPIC_URL)