mirror of
https://github.com/simon987/opendirectories-bot.git
synced 2025-04-20 02:36:45 +00:00
Fixed a problematic website
This commit is contained in:
parent
88cebae047
commit
8e1f4543fd
@ -68,12 +68,10 @@ class PageParser:
|
|||||||
return "d"
|
return "d"
|
||||||
return "f"
|
return "f"
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def clean_page(text):
|
def clean_page(text):
|
||||||
text = text.replace("<A", "<a")
|
text = text.replace("<A", "<a")
|
||||||
text = text.replace("</A", "</a")
|
text = text.replace("</A", "</a")
|
||||||
# text = text.replace("&", "&")
|
|
||||||
text = text.replace("<hr>", "")
|
text = text.replace("<hr>", "")
|
||||||
|
|
||||||
return text
|
return text
|
||||||
@ -108,8 +106,8 @@ class NginxParser(PageParser):
|
|||||||
|
|
||||||
soup = BeautifulSoup(text, "html.parser")
|
soup = BeautifulSoup(text, "html.parser")
|
||||||
|
|
||||||
for link in soup.find("pre").find_all("a"):
|
for pre in soup.find_all("pre"):
|
||||||
|
for link in pre.find_all("a"):
|
||||||
parsed_link = self.parse_link(link, text, base_url)
|
parsed_link = self.parse_link(link, text, base_url)
|
||||||
if parsed_link is not None:
|
if parsed_link is not None:
|
||||||
links[parsed_link[0]] = parsed_link[1]
|
links[parsed_link[0]] = parsed_link[1]
|
||||||
|
@ -7,7 +7,6 @@ https://zeus.feralhosting.com/matt07211/Anime-OST/ (root is nginx, /Flac is Apac
|
|||||||
https://filepursuit.com/ (recursion problem - not an OD)
|
https://filepursuit.com/ (recursion problem - not an OD)
|
||||||
https://drive.google.com/drive/folders/0BzylFUcRnoTZflc3Z3Z0eDFuV0Q4M2ROUlg5UWNWLWs3WDBtNFZZUkdqUmxqQm9nd0VPclk (Should be excluded)
|
https://drive.google.com/drive/folders/0BzylFUcRnoTZflc3Z3Z0eDFuV0Q4M2ROUlg5UWNWLWs3WDBtNFZZUkdqUmxqQm9nd0VPclk (Should be excluded)
|
||||||
|
|
||||||
http://www.gamers.org/pub/archives/uwp-uml/ (?)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -24,3 +23,4 @@ https://www.datto.com/resource-downloads/
|
|||||||
https://www.annmariegianni.com/wp-content/uploads/
|
https://www.annmariegianni.com/wp-content/uploads/
|
||||||
http://archive.scene.org/pub/resources/docs/bbs_finland/
|
http://archive.scene.org/pub/resources/docs/bbs_finland/
|
||||||
http://dl.apkhome.org
|
http://dl.apkhome.org
|
||||||
|
http://www.gamers.org/pub/archives/uwp-uml/
|
Loading…
x
Reference in New Issue
Block a user