This commit is contained in:
simon987 2020-07-05 18:45:50 -04:00
parent 672034c690
commit d8753ae800

8
run
View File

@ -56,6 +56,8 @@ def _download(link, i):
filename = "%s%02d_%s.gz" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_"))
r = session.get(link)
with open("debug._download.html", "wb") as f:
f.write(r.content)
with gzip.open(filename, "wb") as f:
f.write(r.content)
@ -71,6 +73,8 @@ def do_premium_download(link, i):
}, headers={
"Content-Type": "application/x-www-form-urlencoded"
})
with open("debug.do_premium_download.html", "wb") as f:
f.write(r.content)
soup = BeautifulSoup(r.content, "html.parser")
form = soup.find("form")
@ -87,6 +91,8 @@ def do_premium_download(link, i):
"host": form.find("input", attrs={"name": "host"}).get("value"),
"path": form.find("input", attrs={"name": "path"}).get("value"),
})
with open("debug.do_premium_download2.html", "wb") as f:
f.write(r.content)
soup2 = BeautifulSoup(r2.content, "html.parser")
try:
download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download")
@ -103,6 +109,8 @@ def get_topic_id(topic_url):
def parse_topic(topic_url):
r = session.get(topic_url)
with open("debug.parse_topic.html", "wb") as f:
f.write(r.content)
soup = BeautifulSoup(r.content, "html.parser")
for i, elem in enumerate(soup.find_all(class_="postlink")):