mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 10:26:44 +00:00
Attempt to fix unicode decode errors
This commit is contained in:
parent
9d0a0a8b42
commit
c309aa25c8
@ -102,6 +102,7 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
def _parse_links(self, body: bytes) -> list:
|
def _parse_links(self, body: bytes) -> list:
|
||||||
|
|
||||||
result = list()
|
result = list()
|
||||||
|
try:
|
||||||
tree = etree.HTML(body, parser=self.parser)
|
tree = etree.HTML(body, parser=self.parser)
|
||||||
links = []
|
links = []
|
||||||
try:
|
try:
|
||||||
@ -111,6 +112,16 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
|
|
||||||
for link in links:
|
for link in links:
|
||||||
result.append((link.text, link.get("href")))
|
result.append((link.text, link.get("href")))
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
tree = etree.HTML(body.decode("utf-8", errors="ignore"), parser=self.parser)
|
||||||
|
links = []
|
||||||
|
try:
|
||||||
|
links = tree.findall(".//a/[@href]")
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
result.append((link.text, link.get("href")))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user