mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 18:36:44 +00:00
Fixed bug in crawler when file count in a directory is greater than 150
This commit is contained in:
parent
cf96d1697d
commit
756e331c83
@ -119,7 +119,7 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
if self._isdir(anchor):
|
if self._isdir(anchor):
|
||||||
|
|
||||||
directory = File(
|
directory = File(
|
||||||
name=anchor.href, # todo handle external links here
|
name=anchor.href, # todo handle external links here
|
||||||
mtime=0,
|
mtime=0,
|
||||||
size=0,
|
size=0,
|
||||||
path=path,
|
path=path,
|
||||||
@ -143,7 +143,9 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
pool = ThreadPool(processes=10)
|
pool = ThreadPool(processes=10)
|
||||||
files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request))
|
files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request))
|
||||||
pool.close()
|
pool.close()
|
||||||
return (f for f in files if f)
|
for file in files:
|
||||||
|
if file:
|
||||||
|
yield file
|
||||||
else:
|
else:
|
||||||
# Too few urls to create thread pool
|
# Too few urls to create thread pool
|
||||||
for url in urls_to_request:
|
for url in urls_to_request:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user