Fixed a problematic website

2025-12-14 07:39:04 +00:00 · 2018-02-06 17:15:50 -05:00
parent 88cebae047
commit 8e1f4543fd
2 changed files with 7 additions and 9 deletions
--- a/parser.py
+++ b/parser.py
@@ -68,12 +68,10 @@ class PageParser:
            return "d"
        return "f"

-
    @staticmethod
    def clean_page(text):
        text = text.replace("<A", "<a")
        text = text.replace("</A", "</a")
-        # text = text.replace("&", "&amp;")
        text = text.replace("<hr>", "")

        return text
@@ -108,11 +106,11 @@ class NginxParser(PageParser):

        soup = BeautifulSoup(text, "html.parser")

-        for link in soup.find("pre").find_all("a"):
-
-            parsed_link = self.parse_link(link, text, base_url)
-            if parsed_link is not None:
-                links[parsed_link[0]] = parsed_link[1]
+        for pre in soup.find_all("pre"):
+            for link in pre.find_all("a"):
+                parsed_link = self.parse_link(link, text, base_url)
+                if parsed_link is not None:
+                    links[parsed_link[0]] = parsed_link[1]

        return links