Fixed bug in crawler when file count in a directory is greater than 150

2025-10-25 03:46:52 +00:00 · 2018-07-17 11:03:10 -04:00 · 2018-07-17 11:03:10 -04:00 · 756e331c83
commit 756e331c83
parent cf96d1697d
1 changed files with 4 additions and 2 deletions
--- a/crawl_server/remote_http.py
+++ b/crawl_server/remote_http.py
@ -119,7 +119,7 @@ class HttpDirectory(RemoteDirectory):
            if self._isdir(anchor):
                directory = File(
-                    name=anchor.href, # todo handle external links here
+                    name=anchor.href,  # todo handle external links here
                    mtime=0,
                    size=0,
                    path=path,
@ -143,7 +143,9 @@ class HttpDirectory(RemoteDirectory):
            pool = ThreadPool(processes=10)
            files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request))
            pool.close()
-            return (f for f in files if f)
+            for file in files:
                if file:
                    yield file
        else:
            # Too few urls to create thread pool
            for url in urls_to_request: