mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 18:36:44 +00:00
Request content is read all at once
This commit is contained in:
parent
78d1b7a5bd
commit
cc4c70f400
@ -104,7 +104,7 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
current_dir_name = path[path.rstrip("/").rfind("/") + 1: -1]
|
current_dir_name = path[path.rstrip("/").rfind("/") + 1: -1]
|
||||||
path_identifier = hashlib.md5(current_dir_name.encode())
|
path_identifier = hashlib.md5(current_dir_name.encode())
|
||||||
path_url = urljoin(self.base_url, path, "")
|
path_url = urljoin(self.base_url, path, "")
|
||||||
body = self._stream_body(path_url)
|
body = self._fetch_body(path_url)
|
||||||
anchors = self._parse_links(body)
|
anchors = self._parse_links(body)
|
||||||
|
|
||||||
urls_to_request = []
|
urls_to_request = []
|
||||||
@ -176,19 +176,16 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
logger.debug("TimeoutError - _request_file")
|
logger.debug("TimeoutError - _request_file")
|
||||||
raise TimeoutError
|
raise TimeoutError
|
||||||
|
|
||||||
def _stream_body(self, url: str):
|
def _fetch_body(self, url: str):
|
||||||
retries = HttpDirectory.MAX_RETRIES
|
retries = HttpDirectory.MAX_RETRIES
|
||||||
while retries > 0:
|
while retries > 0:
|
||||||
try:
|
try:
|
||||||
r = self.session.get(url, stream=True, timeout=HttpDirectory.TIMEOUT)
|
r = self.session.get(url, timeout=HttpDirectory.TIMEOUT)
|
||||||
for chunk in r.iter_content(chunk_size=8192):
|
|
||||||
try:
|
try:
|
||||||
yield chunk.decode(r.encoding if r.encoding else "utf-8", errors="ignore")
|
return r.content.decode(r.encoding if r.encoding else "utf-8", errors="ignore")
|
||||||
except LookupError:
|
except LookupError:
|
||||||
# Unsupported encoding
|
# Unsupported encoding
|
||||||
yield chunk.decode("utf-8", errors="ignore")
|
return r.content.decode("utf-8", errors="ignore")
|
||||||
r.close()
|
|
||||||
return
|
|
||||||
except RequestException:
|
except RequestException:
|
||||||
self.session.close()
|
self.session.close()
|
||||||
retries -= 1
|
retries -= 1
|
||||||
@ -200,14 +197,8 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
def _parse_links(body):
|
def _parse_links(body):
|
||||||
|
|
||||||
parser = HTMLAnchorParser()
|
parser = HTMLAnchorParser()
|
||||||
anchors = []
|
parser.feed(body)
|
||||||
|
return parser.anchors
|
||||||
for chunk in body:
|
|
||||||
parser.feed(chunk)
|
|
||||||
for anchor in parser.anchors:
|
|
||||||
anchors.append(anchor)
|
|
||||||
|
|
||||||
return anchors
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _isdir(link: Anchor):
|
def _isdir(link: Anchor):
|
||||||
|
21
test/files/apache_table.html
Normal file
21
test/files/apache_table.html
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Index of /Public/bootstrap</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Index of /Public/bootstrap</h1>
|
||||||
|
<table>
|
||||||
|
<tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr>
|
||||||
|
<tr><th colspan="5"><hr></th></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/back.gif" alt="[PARENTDIR]"></td><td><a href="/Public/">Parent Directory</a> </td><td> </td><td align="right"> - </td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="bower.json">bower.json</a> </td><td align="right">2017-04-05 01:45 </td><td align="right">1.0K</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="css/">css/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="image/">image/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="js/">js/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="less/">less/</a> </td><td align="right">2017-09-07 18:03 </td><td align="right"> - </td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="package.json">package.json</a> </td><td align="right">2017-04-05 01:45 </td><td align="right">666 </td><td> </td></tr>
|
||||||
|
<tr><th colspan="5"><hr></th></tr>
|
||||||
|
</table>
|
||||||
|
</body></html>
|
||||||
|
|
13
test/webserver.py
Normal file
13
test/webserver.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from flask import Flask, send_file
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/test1/")
|
||||||
|
def test1():
|
||||||
|
return send_file("files/apache_table.html")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run("0.0.0.0", port=8888, threaded=True)
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user