mirror of
https://github.com/simon987/mobilism_scrape.git
synced 2025-04-19 18:46:46 +00:00
login
This commit is contained in:
parent
d8753ae800
commit
9cf3d5ccee
54
run
54
run
@ -7,37 +7,20 @@ import pickle
|
|||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
from hashlib import sha1
|
|
||||||
from sys import stderr
|
from sys import stderr
|
||||||
from urllib.parse import urlparse, unquote
|
from urllib.parse import urlparse, unquote
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from requests.cookies import RequestsCookieJar
|
|
||||||
|
|
||||||
|
# secret = json.loads(os.environ["PROJECT_SECRET"])
|
||||||
|
|
||||||
def decode_cookiejar(data):
|
# with open("tmp.conf", "w") as f:
|
||||||
cj = RequestsCookieJar()
|
# f.write(secret["rclone"])
|
||||||
cj._cookies = pickle.loads(b64decode(data))
|
|
||||||
return cj
|
|
||||||
|
|
||||||
|
|
||||||
# import browser_cookie3
|
|
||||||
# cj = cookiejar_filter(browser_cookie3.firefox(), "forum.mobilism.org|mblservices.org")
|
|
||||||
# with open("cookies.txt", "w") as f:
|
|
||||||
# f.write(encode_cookiejar(cj))
|
|
||||||
|
|
||||||
secret = json.loads(os.environ["PROJECT_SECRET"])
|
|
||||||
|
|
||||||
with open("tmp.conf", "w") as f:
|
|
||||||
f.write(secret["rclone"])
|
|
||||||
|
|
||||||
cj = decode_cookiejar(secret["cookies"])
|
|
||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.cookies = cj
|
|
||||||
|
|
||||||
TOPIC_URL = os.environ["TASK_RECIPE"]
|
# TOPIC_URL = os.environ["TASK_RECIPE"]
|
||||||
|
|
||||||
PREMIUM_LINKS = (
|
PREMIUM_LINKS = (
|
||||||
"tusfiles.com", "userscloud.com", "uploaded.net", "ul.to", "uploaded.to", "2shared.com",
|
"tusfiles.com", "userscloud.com", "uploaded.net", "ul.to", "uploaded.to", "2shared.com",
|
||||||
@ -56,8 +39,6 @@ def _download(link, i):
|
|||||||
filename = "%s%02d_%s.gz" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_"))
|
filename = "%s%02d_%s.gz" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_"))
|
||||||
|
|
||||||
r = session.get(link)
|
r = session.get(link)
|
||||||
with open("debug._download.html", "wb") as f:
|
|
||||||
f.write(r.content)
|
|
||||||
|
|
||||||
with gzip.open(filename, "wb") as f:
|
with gzip.open(filename, "wb") as f:
|
||||||
f.write(r.content)
|
f.write(r.content)
|
||||||
@ -73,8 +54,6 @@ def do_premium_download(link, i):
|
|||||||
}, headers={
|
}, headers={
|
||||||
"Content-Type": "application/x-www-form-urlencoded"
|
"Content-Type": "application/x-www-form-urlencoded"
|
||||||
})
|
})
|
||||||
with open("debug.do_premium_download.html", "wb") as f:
|
|
||||||
f.write(r.content)
|
|
||||||
|
|
||||||
soup = BeautifulSoup(r.content, "html.parser")
|
soup = BeautifulSoup(r.content, "html.parser")
|
||||||
form = soup.find("form")
|
form = soup.find("form")
|
||||||
@ -91,8 +70,6 @@ def do_premium_download(link, i):
|
|||||||
"host": form.find("input", attrs={"name": "host"}).get("value"),
|
"host": form.find("input", attrs={"name": "host"}).get("value"),
|
||||||
"path": form.find("input", attrs={"name": "path"}).get("value"),
|
"path": form.find("input", attrs={"name": "path"}).get("value"),
|
||||||
})
|
})
|
||||||
with open("debug.do_premium_download2.html", "wb") as f:
|
|
||||||
f.write(r.content)
|
|
||||||
soup2 = BeautifulSoup(r2.content, "html.parser")
|
soup2 = BeautifulSoup(r2.content, "html.parser")
|
||||||
try:
|
try:
|
||||||
download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download")
|
download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download")
|
||||||
@ -109,8 +86,6 @@ def get_topic_id(topic_url):
|
|||||||
|
|
||||||
def parse_topic(topic_url):
|
def parse_topic(topic_url):
|
||||||
r = session.get(topic_url)
|
r = session.get(topic_url)
|
||||||
with open("debug.parse_topic.html", "wb") as f:
|
|
||||||
f.write(r.content)
|
|
||||||
soup = BeautifulSoup(r.content, "html.parser")
|
soup = BeautifulSoup(r.content, "html.parser")
|
||||||
|
|
||||||
for i, elem in enumerate(soup.find_all(class_="postlink")):
|
for i, elem in enumerate(soup.find_all(class_="postlink")):
|
||||||
@ -122,5 +97,22 @@ def parse_topic(topic_url):
|
|||||||
do_premium_download(link, i)
|
do_premium_download(link, i)
|
||||||
|
|
||||||
|
|
||||||
topic_id = get_topic_id(TOPIC_URL)
|
def login():
|
||||||
parse_topic(TOPIC_URL)
|
r = session.get("https://forum.mobilism.org/ucp.php?mode=login")
|
||||||
|
soup = BeautifulSoup(r.content, "html.parser")
|
||||||
|
|
||||||
|
r = session.post("https://forum.mobilism.org/ucp.php?mode=login", data={
|
||||||
|
"username": "78419273891",
|
||||||
|
"password": "uprising-5overtly",
|
||||||
|
"login": "Login",
|
||||||
|
"redirect": ["./ucp.php?mode=login", "index.php"],
|
||||||
|
"sid": soup.find("input", attrs={"name": "sid"}).get("value")
|
||||||
|
}, headers={
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded"
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# topic_id = get_topic_id(TOPIC_URL)
|
||||||
|
login()
|
||||||
|
# parse_topic(TOPIC_URL)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user