mirror of
https://github.com/simon987/opendirectories-bot.git
synced 2025-04-10 14:06:44 +00:00
Fixed a problematic website
This commit is contained in:
parent
88cebae047
commit
8e1f4543fd
12
parser.py
12
parser.py
@ -68,12 +68,10 @@ class PageParser:
|
||||
return "d"
|
||||
return "f"
|
||||
|
||||
|
||||
@staticmethod
|
||||
def clean_page(text):
|
||||
text = text.replace("<A", "<a")
|
||||
text = text.replace("</A", "</a")
|
||||
# text = text.replace("&", "&")
|
||||
text = text.replace("<hr>", "")
|
||||
|
||||
return text
|
||||
@ -108,11 +106,11 @@ class NginxParser(PageParser):
|
||||
|
||||
soup = BeautifulSoup(text, "html.parser")
|
||||
|
||||
for link in soup.find("pre").find_all("a"):
|
||||
|
||||
parsed_link = self.parse_link(link, text, base_url)
|
||||
if parsed_link is not None:
|
||||
links[parsed_link[0]] = parsed_link[1]
|
||||
for pre in soup.find_all("pre"):
|
||||
for link in pre.find_all("a"):
|
||||
parsed_link = self.parse_link(link, text, base_url)
|
||||
if parsed_link is not None:
|
||||
links[parsed_link[0]] = parsed_link[1]
|
||||
|
||||
return links
|
||||
|
||||
|
@ -7,7 +7,6 @@ https://zeus.feralhosting.com/matt07211/Anime-OST/ (root is nginx, /Flac is Apac
|
||||
https://filepursuit.com/ (recursion problem - not an OD)
|
||||
https://drive.google.com/drive/folders/0BzylFUcRnoTZflc3Z3Z0eDFuV0Q4M2ROUlg5UWNWLWs3WDBtNFZZUkdqUmxqQm9nd0VPclk (Should be excluded)
|
||||
|
||||
http://www.gamers.org/pub/archives/uwp-uml/ (?)
|
||||
|
||||
|
||||
|
||||
@ -23,4 +22,5 @@ http://www.serenitystreetnews.com/videos/
|
||||
https://www.datto.com/resource-downloads/
|
||||
https://www.annmariegianni.com/wp-content/uploads/
|
||||
http://archive.scene.org/pub/resources/docs/bbs_finland/
|
||||
http://dl.apkhome.org
|
||||
http://dl.apkhome.org
|
||||
http://www.gamers.org/pub/archives/uwp-uml/
|
Loading…
x
Reference in New Issue
Block a user