mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 18:36:44 +00:00
Request content is read all at once
This commit is contained in:
parent
78d1b7a5bd
commit
cc4c70f400
@ -104,7 +104,7 @@ class HttpDirectory(RemoteDirectory):
|
||||
current_dir_name = path[path.rstrip("/").rfind("/") + 1: -1]
|
||||
path_identifier = hashlib.md5(current_dir_name.encode())
|
||||
path_url = urljoin(self.base_url, path, "")
|
||||
body = self._stream_body(path_url)
|
||||
body = self._fetch_body(path_url)
|
||||
anchors = self._parse_links(body)
|
||||
|
||||
urls_to_request = []
|
||||
@ -176,19 +176,16 @@ class HttpDirectory(RemoteDirectory):
|
||||
logger.debug("TimeoutError - _request_file")
|
||||
raise TimeoutError
|
||||
|
||||
def _stream_body(self, url: str):
|
||||
def _fetch_body(self, url: str):
|
||||
retries = HttpDirectory.MAX_RETRIES
|
||||
while retries > 0:
|
||||
try:
|
||||
r = self.session.get(url, stream=True, timeout=HttpDirectory.TIMEOUT)
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
try:
|
||||
yield chunk.decode(r.encoding if r.encoding else "utf-8", errors="ignore")
|
||||
except LookupError:
|
||||
# Unsupported encoding
|
||||
yield chunk.decode("utf-8", errors="ignore")
|
||||
r.close()
|
||||
return
|
||||
r = self.session.get(url, timeout=HttpDirectory.TIMEOUT)
|
||||
try:
|
||||
return r.content.decode(r.encoding if r.encoding else "utf-8", errors="ignore")
|
||||
except LookupError:
|
||||
# Unsupported encoding
|
||||
return r.content.decode("utf-8", errors="ignore")
|
||||
except RequestException:
|
||||
self.session.close()
|
||||
retries -= 1
|
||||
@ -200,14 +197,8 @@ class HttpDirectory(RemoteDirectory):
|
||||
def _parse_links(body):
|
||||
|
||||
parser = HTMLAnchorParser()
|
||||
anchors = []
|
||||
|
||||
for chunk in body:
|
||||
parser.feed(chunk)
|
||||
for anchor in parser.anchors:
|
||||
anchors.append(anchor)
|
||||
|
||||
return anchors
|
||||
parser.feed(body)
|
||||
return parser.anchors
|
||||
|
||||
@staticmethod
|
||||
def _isdir(link: Anchor):
|
||||
|
21
test/files/apache_table.html
Normal file
21
test/files/apache_table.html
Normal file
@ -0,0 +1,21 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Index of /Public/bootstrap</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Index of /Public/bootstrap</h1>
|
||||
<table>
|
||||
<tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr>
|
||||
<tr><th colspan="5"><hr></th></tr>
|
||||
<tr><td valign="top"><img src="/icons/back.gif" alt="[PARENTDIR]"></td><td><a href="/Public/">Parent Directory</a> </td><td> </td><td align="right"> - </td><td> </td></tr>
|
||||
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="bower.json">bower.json</a> </td><td align="right">2017-04-05 01:45 </td><td align="right">1.0K</td><td> </td></tr>
|
||||
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="css/">css/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td> </td></tr>
|
||||
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="image/">image/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td> </td></tr>
|
||||
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="js/">js/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td> </td></tr>
|
||||
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="less/">less/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td> </td></tr>
|
||||
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="package.json">package.json</a> </td><td align="right">2017-04-05 01:45 </td><td align="right">666 </td><td> </td></tr>
|
||||
<tr><th colspan="5"><hr></th></tr>
|
||||
</table>
|
||||
</body></html>
|
||||
|
13
test/webserver.py
Normal file
13
test/webserver.py
Normal file
@ -0,0 +1,13 @@
|
||||
from flask import Flask, send_file
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/test1/")
|
||||
def test1():
|
||||
return send_file("files/apache_table.html")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run("0.0.0.0", port=8888, threaded=True)
|
||||
|
Loading…
x
Reference in New Issue
Block a user