More FTP crawler bug fixes

This commit is contained in:
Simon 2018-06-24 18:05:30 -04:00
parent ab35ce96cc
commit e11343de23
3 changed files with 3 additions and 4 deletions

View File

@ -138,7 +138,6 @@ class RemoteDirectoryCrawler:
path = in_q.get(timeout=150) path = in_q.get(timeout=150)
except Empty: except Empty:
directory.close() directory.close()
print("Directory timed out")
break break
if path is None: if path is None:

View File

@ -30,7 +30,8 @@ class FtpDirectory(RemoteDirectory):
self.ftp = ftputil.FTPHost(self.base_url, "anonymous", "od-database", session_factory=session_factory( self.ftp = ftputil.FTPHost(self.base_url, "anonymous", "od-database", session_factory=session_factory(
use_passive_mode=True use_passive_mode=True
)) ))
self.ftp._session.timeout = 1 self.ftp.keep_alive()
self.ftp._session.timeout = 30
def stop_when_connected(self): def stop_when_connected(self):
failed_attempts = 0 failed_attempts = 0
@ -96,10 +97,8 @@ class FtpDirectory(RemoteDirectory):
return path, [] return path, []
def reconnect(self): def reconnect(self):
if self.ftp: if self.ftp:
self.ftp.close() self.ftp.close()
time.sleep(8)
self.stop_when_connected() self.stop_when_connected()
def try_stat(self, path): def try_stat(self, path):

View File

@ -83,6 +83,7 @@ category_map = {
'ppm': 'image', 'xpm': 'image', 'xbm': 'image', 'ppm': 'image', 'xpm': 'image', 'xbm': 'image',
'rgb': 'image', 'svg': 'image', 'psd': 'image', 'rgb': 'image', 'svg': 'image', 'psd': 'image',
'yuv': 'image', 'ai': 'image', 'eps': 'image', 'yuv': 'image', 'ai': 'image', 'eps': 'image',
'bw': 'image',
# Archive category # Archive category
'ar': 'archive', 'cpio': 'archive', 'shar': 'archive', 'ar': 'archive', 'cpio': 'archive', 'shar': 'archive',
'iso': 'archive', 'lbr': 'archive', 'mar': 'archive', 'iso': 'archive', 'lbr': 'archive', 'mar': 'archive',