This commit is contained in:
simon987 2020-07-05 19:04:13 -04:00
parent d8753ae800
commit 9cf3d5ccee

54
run
View File

@ -7,37 +7,20 @@ import pickle
import re import re
import subprocess import subprocess
from base64 import b64decode from base64 import b64decode
from hashlib import sha1
from sys import stderr from sys import stderr
from urllib.parse import urlparse, unquote from urllib.parse import urlparse, unquote
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from requests.cookies import RequestsCookieJar
# secret = json.loads(os.environ["PROJECT_SECRET"])
def decode_cookiejar(data): # with open("tmp.conf", "w") as f:
cj = RequestsCookieJar() # f.write(secret["rclone"])
cj._cookies = pickle.loads(b64decode(data))
return cj
# import browser_cookie3
# cj = cookiejar_filter(browser_cookie3.firefox(), "forum.mobilism.org|mblservices.org")
# with open("cookies.txt", "w") as f:
# f.write(encode_cookiejar(cj))
secret = json.loads(os.environ["PROJECT_SECRET"])
with open("tmp.conf", "w") as f:
f.write(secret["rclone"])
cj = decode_cookiejar(secret["cookies"])
session = requests.Session() session = requests.Session()
session.cookies = cj
TOPIC_URL = os.environ["TASK_RECIPE"] # TOPIC_URL = os.environ["TASK_RECIPE"]
PREMIUM_LINKS = ( PREMIUM_LINKS = (
"tusfiles.com", "userscloud.com", "uploaded.net", "ul.to", "uploaded.to", "2shared.com", "tusfiles.com", "userscloud.com", "uploaded.net", "ul.to", "uploaded.to", "2shared.com",
@ -56,8 +39,6 @@ def _download(link, i):
filename = "%s%02d_%s.gz" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_")) filename = "%s%02d_%s.gz" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_"))
r = session.get(link) r = session.get(link)
with open("debug._download.html", "wb") as f:
f.write(r.content)
with gzip.open(filename, "wb") as f: with gzip.open(filename, "wb") as f:
f.write(r.content) f.write(r.content)
@ -73,8 +54,6 @@ def do_premium_download(link, i):
}, headers={ }, headers={
"Content-Type": "application/x-www-form-urlencoded" "Content-Type": "application/x-www-form-urlencoded"
}) })
with open("debug.do_premium_download.html", "wb") as f:
f.write(r.content)
soup = BeautifulSoup(r.content, "html.parser") soup = BeautifulSoup(r.content, "html.parser")
form = soup.find("form") form = soup.find("form")
@ -91,8 +70,6 @@ def do_premium_download(link, i):
"host": form.find("input", attrs={"name": "host"}).get("value"), "host": form.find("input", attrs={"name": "host"}).get("value"),
"path": form.find("input", attrs={"name": "path"}).get("value"), "path": form.find("input", attrs={"name": "path"}).get("value"),
}) })
with open("debug.do_premium_download2.html", "wb") as f:
f.write(r.content)
soup2 = BeautifulSoup(r2.content, "html.parser") soup2 = BeautifulSoup(r2.content, "html.parser")
try: try:
download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download") download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download")
@ -109,8 +86,6 @@ def get_topic_id(topic_url):
def parse_topic(topic_url): def parse_topic(topic_url):
r = session.get(topic_url) r = session.get(topic_url)
with open("debug.parse_topic.html", "wb") as f:
f.write(r.content)
soup = BeautifulSoup(r.content, "html.parser") soup = BeautifulSoup(r.content, "html.parser")
for i, elem in enumerate(soup.find_all(class_="postlink")): for i, elem in enumerate(soup.find_all(class_="postlink")):
@ -122,5 +97,22 @@ def parse_topic(topic_url):
do_premium_download(link, i) do_premium_download(link, i)
topic_id = get_topic_id(TOPIC_URL) def login():
parse_topic(TOPIC_URL) r = session.get("https://forum.mobilism.org/ucp.php?mode=login")
soup = BeautifulSoup(r.content, "html.parser")
r = session.post("https://forum.mobilism.org/ucp.php?mode=login", data={
"username": "78419273891",
"password": "uprising-5overtly",
"login": "Login",
"redirect": ["./ucp.php?mode=login", "index.php"],
"sid": soup.find("input", attrs={"name": "sid"}).get("value")
}, headers={
"Content-Type": "application/x-www-form-urlencoded"
})
# topic_id = get_topic_id(TOPIC_URL)
login()
# parse_topic(TOPIC_URL)