mirror of
https://github.com/simon987/mobilism_scrape.git
synced 2025-04-19 18:46:46 +00:00
debug
This commit is contained in:
parent
672034c690
commit
d8753ae800
8
run
8
run
@ -56,6 +56,8 @@ def _download(link, i):
|
|||||||
filename = "%s%02d_%s.gz" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_"))
|
filename = "%s%02d_%s.gz" % (topic_id, i, unquote(os.path.basename(link)).replace("/", "_"))
|
||||||
|
|
||||||
r = session.get(link)
|
r = session.get(link)
|
||||||
|
with open("debug._download.html", "wb") as f:
|
||||||
|
f.write(r.content)
|
||||||
|
|
||||||
with gzip.open(filename, "wb") as f:
|
with gzip.open(filename, "wb") as f:
|
||||||
f.write(r.content)
|
f.write(r.content)
|
||||||
@ -71,6 +73,8 @@ def do_premium_download(link, i):
|
|||||||
}, headers={
|
}, headers={
|
||||||
"Content-Type": "application/x-www-form-urlencoded"
|
"Content-Type": "application/x-www-form-urlencoded"
|
||||||
})
|
})
|
||||||
|
with open("debug.do_premium_download.html", "wb") as f:
|
||||||
|
f.write(r.content)
|
||||||
|
|
||||||
soup = BeautifulSoup(r.content, "html.parser")
|
soup = BeautifulSoup(r.content, "html.parser")
|
||||||
form = soup.find("form")
|
form = soup.find("form")
|
||||||
@ -87,6 +91,8 @@ def do_premium_download(link, i):
|
|||||||
"host": form.find("input", attrs={"name": "host"}).get("value"),
|
"host": form.find("input", attrs={"name": "host"}).get("value"),
|
||||||
"path": form.find("input", attrs={"name": "path"}).get("value"),
|
"path": form.find("input", attrs={"name": "path"}).get("value"),
|
||||||
})
|
})
|
||||||
|
with open("debug.do_premium_download2.html", "wb") as f:
|
||||||
|
f.write(r.content)
|
||||||
soup2 = BeautifulSoup(r2.content, "html.parser")
|
soup2 = BeautifulSoup(r2.content, "html.parser")
|
||||||
try:
|
try:
|
||||||
download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download")
|
download_link = soup2.find("a", attrs={"download": lambda x: x}).get("download")
|
||||||
@ -103,6 +109,8 @@ def get_topic_id(topic_url):
|
|||||||
|
|
||||||
def parse_topic(topic_url):
|
def parse_topic(topic_url):
|
||||||
r = session.get(topic_url)
|
r = session.get(topic_url)
|
||||||
|
with open("debug.parse_topic.html", "wb") as f:
|
||||||
|
f.write(r.content)
|
||||||
soup = BeautifulSoup(r.content, "html.parser")
|
soup = BeautifulSoup(r.content, "html.parser")
|
||||||
|
|
||||||
for i, elem in enumerate(soup.find_all(class_="postlink")):
|
for i, elem in enumerate(soup.find_all(class_="postlink")):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user