diff --git a/crawl_server/remote_http.py b/crawl_server/remote_http.py index 2775703..2547da0 100644 --- a/crawl_server/remote_http.py +++ b/crawl_server/remote_http.py @@ -164,7 +164,7 @@ class HttpDirectory(RemoteDirectory): parser = HTMLAnchorParser() for chunk in body: - parser.feed(chunk.decode("utf-8")) + parser.feed(chunk.decode("utf-8", errors="ignore")) for anchor in parser.anchors: yield anchor