From e11343de2330de1711d249333fd69f92b2bbe71e Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 24 Jun 2018 18:05:30 -0400 Subject: [PATCH] More FTP crawler bug fixes --- crawl_server/crawler.py | 1 - crawl_server/remote_ftp.py | 5 ++--- od_util.py | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crawl_server/crawler.py b/crawl_server/crawler.py index dcbfa7d..8ac4616 100644 --- a/crawl_server/crawler.py +++ b/crawl_server/crawler.py @@ -138,7 +138,6 @@ class RemoteDirectoryCrawler: path = in_q.get(timeout=150) except Empty: directory.close() - print("Directory timed out") break if path is None: diff --git a/crawl_server/remote_ftp.py b/crawl_server/remote_ftp.py index df8dc24..0883304 100644 --- a/crawl_server/remote_ftp.py +++ b/crawl_server/remote_ftp.py @@ -30,7 +30,8 @@ class FtpDirectory(RemoteDirectory): self.ftp = ftputil.FTPHost(self.base_url, "anonymous", "od-database", session_factory=session_factory( use_passive_mode=True )) - self.ftp._session.timeout = 1 + self.ftp.keep_alive() + self.ftp._session.timeout = 30 def stop_when_connected(self): failed_attempts = 0 @@ -96,10 +97,8 @@ class FtpDirectory(RemoteDirectory): return path, [] def reconnect(self): - if self.ftp: self.ftp.close() - time.sleep(8) self.stop_when_connected() def try_stat(self, path): diff --git a/od_util.py b/od_util.py index 0001334..21ba503 100644 --- a/od_util.py +++ b/od_util.py @@ -83,6 +83,7 @@ category_map = { 'ppm': 'image', 'xpm': 'image', 'xbm': 'image', 'rgb': 'image', 'svg': 'image', 'psd': 'image', 'yuv': 'image', 'ai': 'image', 'eps': 'image', + 'bw': 'image', # Archive category 'ar': 'archive', 'cpio': 'archive', 'shar': 'archive', 'iso': 'archive', 'lbr': 'archive', 'mar': 'archive',