mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 10:26:44 +00:00
Should fix unknown encoding errors + removed https warnings
This commit is contained in:
parent
80aa8933e6
commit
098ad2be72
@ -1,5 +1,4 @@
|
|||||||
from urllib.parse import unquote, urljoin
|
from urllib.parse import unquote, urljoin
|
||||||
import warnings
|
|
||||||
import os
|
import os
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
@ -11,6 +10,9 @@ import config
|
|||||||
from dateutil.parser import parse as parse_date
|
from dateutil.parser import parse as parse_date
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
|
import urllib3
|
||||||
|
urllib3.disable_warnings()
|
||||||
|
|
||||||
|
|
||||||
class Anchor:
|
class Anchor:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -134,8 +136,6 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
|
|
||||||
def _request_file(self, url):
|
def _request_file(self, url):
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
retries = HttpDirectory.MAX_RETRIES
|
retries = HttpDirectory.MAX_RETRIES
|
||||||
while retries > 0:
|
while retries > 0:
|
||||||
try:
|
try:
|
||||||
@ -159,14 +159,16 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def _stream_body(self, url: str):
|
def _stream_body(self, url: str):
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
retries = HttpDirectory.MAX_RETRIES
|
retries = HttpDirectory.MAX_RETRIES
|
||||||
while retries > 0:
|
while retries > 0:
|
||||||
try:
|
try:
|
||||||
r = self.session.get(url, stream=True, timeout=40)
|
r = self.session.get(url, stream=True, timeout=40)
|
||||||
for chunk in r.iter_content(chunk_size=4096):
|
for chunk in r.iter_content(chunk_size=4096):
|
||||||
|
try:
|
||||||
yield chunk.decode(r.encoding if r.encoding else "utf-8", errors="ignore")
|
yield chunk.decode(r.encoding if r.encoding else "utf-8", errors="ignore")
|
||||||
|
except LookupError:
|
||||||
|
# Unsupported encoding
|
||||||
|
yield chunk.decode("utf-8", errors="ignore")
|
||||||
r.close()
|
r.close()
|
||||||
del r
|
del r
|
||||||
break
|
break
|
||||||
|
Loading…
x
Reference in New Issue
Block a user