Request content is read all at once

This commit is contained in:
Simon 2018-08-11 13:05:24 -04:00
parent 78d1b7a5bd
commit cc4c70f400
3 changed files with 44 additions and 19 deletions

View File

@ -104,7 +104,7 @@ class HttpDirectory(RemoteDirectory):
current_dir_name = path[path.rstrip("/").rfind("/") + 1: -1] current_dir_name = path[path.rstrip("/").rfind("/") + 1: -1]
path_identifier = hashlib.md5(current_dir_name.encode()) path_identifier = hashlib.md5(current_dir_name.encode())
path_url = urljoin(self.base_url, path, "") path_url = urljoin(self.base_url, path, "")
body = self._stream_body(path_url) body = self._fetch_body(path_url)
anchors = self._parse_links(body) anchors = self._parse_links(body)
urls_to_request = [] urls_to_request = []
@ -176,19 +176,16 @@ class HttpDirectory(RemoteDirectory):
logger.debug("TimeoutError - _request_file") logger.debug("TimeoutError - _request_file")
raise TimeoutError raise TimeoutError
def _stream_body(self, url: str): def _fetch_body(self, url: str):
retries = HttpDirectory.MAX_RETRIES retries = HttpDirectory.MAX_RETRIES
while retries > 0: while retries > 0:
try: try:
r = self.session.get(url, stream=True, timeout=HttpDirectory.TIMEOUT) r = self.session.get(url, timeout=HttpDirectory.TIMEOUT)
for chunk in r.iter_content(chunk_size=8192):
try: try:
yield chunk.decode(r.encoding if r.encoding else "utf-8", errors="ignore") return r.content.decode(r.encoding if r.encoding else "utf-8", errors="ignore")
except LookupError: except LookupError:
# Unsupported encoding # Unsupported encoding
yield chunk.decode("utf-8", errors="ignore") return r.content.decode("utf-8", errors="ignore")
r.close()
return
except RequestException: except RequestException:
self.session.close() self.session.close()
retries -= 1 retries -= 1
@ -200,14 +197,8 @@ class HttpDirectory(RemoteDirectory):
def _parse_links(body): def _parse_links(body):
parser = HTMLAnchorParser() parser = HTMLAnchorParser()
anchors = [] parser.feed(body)
return parser.anchors
for chunk in body:
parser.feed(chunk)
for anchor in parser.anchors:
anchors.append(anchor)
return anchors
@staticmethod @staticmethod
def _isdir(link: Anchor): def _isdir(link: Anchor):

View File

@ -0,0 +1,21 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index of /Public/bootstrap</title>
</head>
<body>
<h1>Index of /Public/bootstrap</h1>
<table>
<tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr>
<tr><th colspan="5"><hr></th></tr>
<tr><td valign="top"><img src="/icons/back.gif" alt="[PARENTDIR]"></td><td><a href="/Public/">Parent Directory</a> </td><td>&nbsp;</td><td align="right"> - </td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="bower.json">bower.json</a> </td><td align="right">2017-04-05 01:45 </td><td align="right">1.0K</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="css/">css/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="image/">image/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="js/">js/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="less/">less/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="package.json">package.json</a> </td><td align="right">2017-04-05 01:45 </td><td align="right">666 </td><td>&nbsp;</td></tr>
<tr><th colspan="5"><hr></th></tr>
</table>
</body></html>

13
test/webserver.py Normal file
View File

@ -0,0 +1,13 @@
from flask import Flask, send_file
app = Flask(__name__)
@app.route("/test1/")
def test1():
return send_file("files/apache_table.html")
if __name__ == '__main__':
app.run("0.0.0.0", port=8888, threaded=True)