mirror of
				https://github.com/simon987/mobilism_scrape.git
				synced 2025-11-04 03:56:52 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			126 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			126 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env python3
 | 
						|
 | 
						|
import gzip
 | 
						|
import json
 | 
						|
import os
 | 
						|
import pickle
 | 
						|
import re
 | 
						|
import subprocess
 | 
						|
from base64 import b64decode
 | 
						|
from sys import stderr
 | 
						|
from urllib.parse import urlparse, unquote
 | 
						|
 | 
						|
import requests
 | 
						|
from bs4 import BeautifulSoup
 | 
						|
 | 
						|
secret = json.loads(os.environ["PROJECT_SECRET"])
 | 
						|
 | 
						|
with open("tmp.conf", "w") as f:
 | 
						|
    f.write(secret["rclone"])
 | 
						|
 | 
						|
session = requests.Session()
 | 
						|
 | 
						|
TOPIC_URL = os.environ["TASK_RECIPE"]
 | 
						|
 | 
						|
PREMIUM_LINKS = (
 | 
						|
    "tusfiles.com", "userscloud.com", "uploaded.net", "ul.to", "uploaded.to", "2shared.com",
 | 
						|
    "mediafire.com", "dailyuploads.net", "douploads.net", "centfile.com", "uploadev.org", "intoupload.net",
 | 
						|
    "uploadrar.com", "mixloads.com", "ddownload.com", "filezip.cc", "sendit.cloud", "dropapk.to",
 | 
						|
    "hulkload.com", "filerio.in", "rapidgator.net", "rg.to", "mega4up.com", "upload.ac", "dropgalaxy.in"
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
def is_supported_premium_dl(link):
 | 
						|
    parsed = urlparse(link.lower())
 | 
						|
    return parsed.netloc in PREMIUM_LINKS
 | 
						|
 | 
						|
 | 
						|
def _download(link, i):
 | 
						|
    filename = "%s%02d_%s.gz" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_"))
 | 
						|
 | 
						|
    r = session.get(link)
 | 
						|
 | 
						|
    with gzip.open(filename, "wb") as f:
 | 
						|
        f.write(r.content)
 | 
						|
 | 
						|
    subprocess.run(["rclone", "copy", "--config", "tmp.conf", filename, "staging:mobilism/"])
 | 
						|
    quit(0)
 | 
						|
 | 
						|
 | 
						|
def do_premium_download(link, i):
 | 
						|
    r = session.post("https://mblservices.org/amember/downloader/downloader/app/index.php", data={
 | 
						|
        "link": link,
 | 
						|
        "premium_acc": "on"
 | 
						|
    }, headers={
 | 
						|
        "Content-Type": "application/x-www-form-urlencoded"
 | 
						|
    })
 | 
						|
 | 
						|
    soup = BeautifulSoup(r.content, "html.parser")
 | 
						|
    form = soup.find("form")
 | 
						|
 | 
						|
    with open("debug.do_premium_download.html", "wb") as f:
 | 
						|
        f.write(r.content)
 | 
						|
 | 
						|
    if not form:
 | 
						|
        if "The file you were looking for could not be found" not in r.text:
 | 
						|
            print(r.content, file=stderr)
 | 
						|
        return
 | 
						|
 | 
						|
    r2 = session.post("https://mblservices.org/amember/downloader/downloader/app/index.php", {
 | 
						|
        "link": form.find("input", attrs={"name": "link"}).get("value"),
 | 
						|
        "referer": form.find("input", attrs={"name": "referer"}).get("value"),
 | 
						|
        "filename": form.find("input", attrs={"name": "filename"}).get("value"),
 | 
						|
        "host": form.find("input", attrs={"name": "host"}).get("value"),
 | 
						|
        "path": form.find("input", attrs={"name": "path"}).get("value"),
 | 
						|
    })
 | 
						|
    soup2 = BeautifulSoup(r2.content, "html.parser")
 | 
						|
    try:
 | 
						|
        download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download")
 | 
						|
        _download(download_link, i)
 | 
						|
    except:
 | 
						|
        if "not found" not in r.text:
 | 
						|
            print(r2.content, file=stderr)
 | 
						|
        pass
 | 
						|
 | 
						|
 | 
						|
def get_topic_id(topic_url):
 | 
						|
    return re.search("[&?]t=([0-9]+)", topic_url).group(1)
 | 
						|
 | 
						|
 | 
						|
def parse_topic(topic_url):
 | 
						|
    r = session.get(topic_url)
 | 
						|
    soup = BeautifulSoup(r.content, "html.parser")
 | 
						|
 | 
						|
    with open("debug.parse_topic.html", "wb") as f:
 | 
						|
        f.write(r.content)
 | 
						|
 | 
						|
    for i, elem in enumerate(soup.find_all(class_="postlink")):
 | 
						|
        if not elem.get("href"):
 | 
						|
            continue
 | 
						|
 | 
						|
        link = elem.get("href")
 | 
						|
        if is_supported_premium_dl(link):
 | 
						|
            do_premium_download(link, i)
 | 
						|
 | 
						|
 | 
						|
def login():
 | 
						|
    r = session.get("https://forum.mobilism.org/ucp.php?mode=login")
 | 
						|
    soup = BeautifulSoup(r.content, "html.parser")
 | 
						|
 | 
						|
    r = session.post("https://forum.mobilism.org/ucp.php?mode=login", data={
 | 
						|
        "username": "78419273891",
 | 
						|
        "password": "uprising-5overtly",
 | 
						|
        "login": "Login",
 | 
						|
        "redirect": ["./ucp.php?mode=login", "index.php"],
 | 
						|
        "sid": soup.find("input", attrs={"name": "sid"}).get("value")
 | 
						|
    }, headers={
 | 
						|
        "Content-Type": "application/x-www-form-urlencoded"
 | 
						|
    })
 | 
						|
    with open("debug.login.html", "wb") as f:
 | 
						|
        f.write(r.content)
 | 
						|
 | 
						|
 | 
						|
topic_id = get_topic_id(TOPIC_URL)
 | 
						|
login()
 | 
						|
parse_topic(TOPIC_URL)
 |